Home | History | Annotate | Download | only in CodeGen
      1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // MachineScheduler schedules machine instructions after phi elimination. It
     11 // preserves LiveIntervals so it can be invoked before register allocation.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/CodeGen/MachineScheduler.h"
     16 #include "llvm/ADT/ArrayRef.h"
     17 #include "llvm/ADT/BitVector.h"
     18 #include "llvm/ADT/DenseMap.h"
     19 #include "llvm/ADT/PriorityQueue.h"
     20 #include "llvm/ADT/STLExtras.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/iterator_range.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/CodeGen/LiveInterval.h"
     25 #include "llvm/CodeGen/LiveIntervals.h"
     26 #include "llvm/CodeGen/MachineBasicBlock.h"
     27 #include "llvm/CodeGen/MachineDominators.h"
     28 #include "llvm/CodeGen/MachineFunction.h"
     29 #include "llvm/CodeGen/MachineFunctionPass.h"
     30 #include "llvm/CodeGen/MachineInstr.h"
     31 #include "llvm/CodeGen/MachineLoopInfo.h"
     32 #include "llvm/CodeGen/MachineOperand.h"
     33 #include "llvm/CodeGen/MachinePassRegistry.h"
     34 #include "llvm/CodeGen/MachineRegisterInfo.h"
     35 #include "llvm/CodeGen/Passes.h"
     36 #include "llvm/CodeGen/RegisterClassInfo.h"
     37 #include "llvm/CodeGen/RegisterPressure.h"
     38 #include "llvm/CodeGen/ScheduleDAG.h"
     39 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
     40 #include "llvm/CodeGen/ScheduleDAGMutation.h"
     41 #include "llvm/CodeGen/ScheduleDFS.h"
     42 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
     43 #include "llvm/CodeGen/SlotIndexes.h"
     44 #include "llvm/CodeGen/TargetInstrInfo.h"
     45 #include "llvm/CodeGen/TargetLowering.h"
     46 #include "llvm/CodeGen/TargetPassConfig.h"
     47 #include "llvm/CodeGen/TargetRegisterInfo.h"
     48 #include "llvm/CodeGen/TargetSchedule.h"
     49 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     50 #include "llvm/Config/llvm-config.h"
     51 #include "llvm/MC/LaneBitmask.h"
     52 #include "llvm/Pass.h"
     53 #include "llvm/Support/CommandLine.h"
     54 #include "llvm/Support/Compiler.h"
     55 #include "llvm/Support/Debug.h"
     56 #include "llvm/Support/ErrorHandling.h"
     57 #include "llvm/Support/GraphWriter.h"
     58 #include "llvm/Support/MachineValueType.h"
     59 #include "llvm/Support/raw_ostream.h"
     60 #include <algorithm>
     61 #include <cassert>
     62 #include <cstdint>
     63 #include <iterator>
     64 #include <limits>
     65 #include <memory>
     66 #include <string>
     67 #include <tuple>
     68 #include <utility>
     69 #include <vector>
     70 
     71 using namespace llvm;
     72 
     73 #define DEBUG_TYPE "machine-scheduler"
     74 
     75 namespace llvm {
     76 
     77 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
     78                            cl::desc("Force top-down list scheduling"));
     79 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
     80                             cl::desc("Force bottom-up list scheduling"));
     81 cl::opt<bool>
     82 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
     83                        cl::desc("Print critical path length to stdout"));
     84 
     85 } // end namespace llvm
     86 
     87 #ifndef NDEBUG
     88 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
     89   cl::desc("Pop up a window to show MISched dags after they are processed"));
     90 
     91 /// In some situations a few uninteresting nodes depend on nearly all other
     92 /// nodes in the graph, provide a cutoff to hide them.
     93 static cl::opt<unsigned> ViewMISchedCutoff("view-misched-cutoff", cl::Hidden,
     94   cl::desc("Hide nodes with more predecessor/successor than cutoff"));
     95 
     96 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
     97   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
     98 
     99 static cl::opt<std::string> SchedOnlyFunc("misched-only-func", cl::Hidden,
    100   cl::desc("Only schedule this function"));
    101 static cl::opt<unsigned> SchedOnlyBlock("misched-only-block", cl::Hidden,
    102                                         cl::desc("Only schedule this MBB#"));
    103 #else
    104 static bool ViewMISchedDAGs = false;
    105 #endif // NDEBUG
    106 
    107 /// Avoid quadratic complexity in unusually large basic blocks by limiting the
    108 /// size of the ready lists.
    109 static cl::opt<unsigned> ReadyListLimit("misched-limit", cl::Hidden,
    110   cl::desc("Limit ready list to N instructions"), cl::init(256));
    111 
    112 static cl::opt<bool> EnableRegPressure("misched-regpressure", cl::Hidden,
    113   cl::desc("Enable register pressure scheduling."), cl::init(true));
    114 
    115 static cl::opt<bool> EnableCyclicPath("misched-cyclicpath", cl::Hidden,
    116   cl::desc("Enable cyclic critical path analysis."), cl::init(true));
    117 
    118 static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
    119                                         cl::desc("Enable memop clustering."),
    120                                         cl::init(true));
    121 
    122 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
    123   cl::desc("Verify machine instrs before and after machine scheduling"));
    124 
    125 // DAG subtrees must have at least this many nodes.
    126 static const unsigned MinSubtreeSize = 8;
    127 
    128 // Pin the vtables to this file.
    129 void MachineSchedStrategy::anchor() {}
    130 
    131 void ScheduleDAGMutation::anchor() {}
    132 
    133 //===----------------------------------------------------------------------===//
    134 // Machine Instruction Scheduling Pass and Registry
    135 //===----------------------------------------------------------------------===//
    136 
    137 MachineSchedContext::MachineSchedContext() {
    138   RegClassInfo = new RegisterClassInfo();
    139 }
    140 
    141 MachineSchedContext::~MachineSchedContext() {
    142   delete RegClassInfo;
    143 }
    144 
    145 namespace {
    146 
    147 /// Base class for a machine scheduler class that can run at any point.
    148 class MachineSchedulerBase : public MachineSchedContext,
    149                              public MachineFunctionPass {
    150 public:
    151   MachineSchedulerBase(char &ID): MachineFunctionPass(ID) {}
    152 
    153   void print(raw_ostream &O, const Module* = nullptr) const override;
    154 
    155 protected:
    156   void scheduleRegions(ScheduleDAGInstrs &Scheduler, bool FixKillFlags);
    157 };
    158 
    159 /// MachineScheduler runs after coalescing and before register allocation.
    160 class MachineScheduler : public MachineSchedulerBase {
    161 public:
    162   MachineScheduler();
    163 
    164   void getAnalysisUsage(AnalysisUsage &AU) const override;
    165 
    166   bool runOnMachineFunction(MachineFunction&) override;
    167 
    168   static char ID; // Class identification, replacement for typeinfo
    169 
    170 protected:
    171   ScheduleDAGInstrs *createMachineScheduler();
    172 };
    173 
    174 /// PostMachineScheduler runs after shortly before code emission.
    175 class PostMachineScheduler : public MachineSchedulerBase {
    176 public:
    177   PostMachineScheduler();
    178 
    179   void getAnalysisUsage(AnalysisUsage &AU) const override;
    180 
    181   bool runOnMachineFunction(MachineFunction&) override;
    182 
    183   static char ID; // Class identification, replacement for typeinfo
    184 
    185 protected:
    186   ScheduleDAGInstrs *createPostMachineScheduler();
    187 };
    188 
    189 } // end anonymous namespace
    190 
    191 char MachineScheduler::ID = 0;
    192 
    193 char &llvm::MachineSchedulerID = MachineScheduler::ID;
    194 
    195 INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
    196                       "Machine Instruction Scheduler", false, false)
    197 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
    198 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    199 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    200 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    201 INITIALIZE_PASS_END(MachineScheduler, DEBUG_TYPE,
    202                     "Machine Instruction Scheduler", false, false)
    203 
    204 MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
    205   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
    206 }
    207 
    208 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
    209   AU.setPreservesCFG();
    210   AU.addRequiredID(MachineDominatorsID);
    211   AU.addRequired<MachineLoopInfo>();
    212   AU.addRequired<AAResultsWrapperPass>();
    213   AU.addRequired<TargetPassConfig>();
    214   AU.addRequired<SlotIndexes>();
    215   AU.addPreserved<SlotIndexes>();
    216   AU.addRequired<LiveIntervals>();
    217   AU.addPreserved<LiveIntervals>();
    218   MachineFunctionPass::getAnalysisUsage(AU);
    219 }
    220 
    221 char PostMachineScheduler::ID = 0;
    222 
    223 char &llvm::PostMachineSchedulerID = PostMachineScheduler::ID;
    224 
    225 INITIALIZE_PASS(PostMachineScheduler, "postmisched",
    226                 "PostRA Machine Instruction Scheduler", false, false)
    227 
    228 PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
    229   initializePostMachineSchedulerPass(*PassRegistry::getPassRegistry());
    230 }
    231 
    232 void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
    233   AU.setPreservesCFG();
    234   AU.addRequiredID(MachineDominatorsID);
    235   AU.addRequired<MachineLoopInfo>();
    236   AU.addRequired<TargetPassConfig>();
    237   MachineFunctionPass::getAnalysisUsage(AU);
    238 }
    239 
    240 MachinePassRegistry MachineSchedRegistry::Registry;
    241 
    242 /// A dummy default scheduler factory indicates whether the scheduler
    243 /// is overridden on the command line.
    244 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
    245   return nullptr;
    246 }
    247 
    248 /// MachineSchedOpt allows command line selection of the scheduler.
    249 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
    250                RegisterPassParser<MachineSchedRegistry>>
    251 MachineSchedOpt("misched",
    252                 cl::init(&useDefaultMachineSched), cl::Hidden,
    253                 cl::desc("Machine instruction scheduler to use"));
    254 
    255 static MachineSchedRegistry
    256 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
    257                      useDefaultMachineSched);
    258 
    259 static cl::opt<bool> EnableMachineSched(
    260     "enable-misched",
    261     cl::desc("Enable the machine instruction scheduling pass."), cl::init(true),
    262     cl::Hidden);
    263 
    264 static cl::opt<bool> EnablePostRAMachineSched(
    265     "enable-post-misched",
    266     cl::desc("Enable the post-ra machine instruction scheduling pass."),
    267     cl::init(true), cl::Hidden);
    268 
    269 /// Decrement this iterator until reaching the top or a non-debug instr.
    270 static MachineBasicBlock::const_iterator
    271 priorNonDebug(MachineBasicBlock::const_iterator I,
    272               MachineBasicBlock::const_iterator Beg) {
    273   assert(I != Beg && "reached the top of the region, cannot decrement");
    274   while (--I != Beg) {
    275     if (!I->isDebugInstr())
    276       break;
    277   }
    278   return I;
    279 }
    280 
    281 /// Non-const version.
    282 static MachineBasicBlock::iterator
    283 priorNonDebug(MachineBasicBlock::iterator I,
    284               MachineBasicBlock::const_iterator Beg) {
    285   return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
    286       .getNonConstIterator();
    287 }
    288 
    289 /// If this iterator is a debug value, increment until reaching the End or a
    290 /// non-debug instruction.
    291 static MachineBasicBlock::const_iterator
    292 nextIfDebug(MachineBasicBlock::const_iterator I,
    293             MachineBasicBlock::const_iterator End) {
    294   for(; I != End; ++I) {
    295     if (!I->isDebugInstr())
    296       break;
    297   }
    298   return I;
    299 }
    300 
    301 /// Non-const version.
    302 static MachineBasicBlock::iterator
    303 nextIfDebug(MachineBasicBlock::iterator I,
    304             MachineBasicBlock::const_iterator End) {
    305   return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
    306       .getNonConstIterator();
    307 }
    308 
    309 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
    310 ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() {
    311   // Select the scheduler, or set the default.
    312   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
    313   if (Ctor != useDefaultMachineSched)
    314     return Ctor(this);
    315 
    316   // Get the default scheduler set by the target for this function.
    317   ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this);
    318   if (Scheduler)
    319     return Scheduler;
    320 
    321   // Default to GenericScheduler.
    322   return createGenericSchedLive(this);
    323 }
    324 
    325 /// Instantiate a ScheduleDAGInstrs for PostRA scheduling that will be owned by
    326 /// the caller. We don't have a command line option to override the postRA
    327 /// scheduler. The Target must configure it.
    328 ScheduleDAGInstrs *PostMachineScheduler::createPostMachineScheduler() {
    329   // Get the postRA scheduler set by the target for this function.
    330   ScheduleDAGInstrs *Scheduler = PassConfig->createPostMachineScheduler(this);
    331   if (Scheduler)
    332     return Scheduler;
    333 
    334   // Default to GenericScheduler.
    335   return createGenericSchedPostRA(this);
    336 }
    337 
    338 /// Top-level MachineScheduler pass driver.
    339 ///
    340 /// Visit blocks in function order. Divide each block into scheduling regions
    341 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
    342 /// consistent with the DAG builder, which traverses the interior of the
    343 /// scheduling regions bottom-up.
    344 ///
    345 /// This design avoids exposing scheduling boundaries to the DAG builder,
    346 /// simplifying the DAG builder's support for "special" target instructions.
    347 /// At the same time the design allows target schedulers to operate across
    348 /// scheduling boundaries, for example to bundle the boundary instructions
    349 /// without reordering them. This creates complexity, because the target
    350 /// scheduler must update the RegionBegin and RegionEnd positions cached by
    351 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
    352 /// design would be to split blocks at scheduling boundaries, but LLVM has a
    353 /// general bias against block splitting purely for implementation simplicity.
    354 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
    355   if (skipFunction(mf.getFunction()))
    356     return false;
    357 
    358   if (EnableMachineSched.getNumOccurrences()) {
    359     if (!EnableMachineSched)
    360       return false;
    361   } else if (!mf.getSubtarget().enableMachineScheduler())
    362     return false;
    363 
    364   LLVM_DEBUG(dbgs() << "Before MISched:\n"; mf.print(dbgs()));
    365 
    366   // Initialize the context of the pass.
    367   MF = &mf;
    368   MLI = &getAnalysis<MachineLoopInfo>();
    369   MDT = &getAnalysis<MachineDominatorTree>();
    370   PassConfig = &getAnalysis<TargetPassConfig>();
    371   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    372 
    373   LIS = &getAnalysis<LiveIntervals>();
    374 
    375   if (VerifyScheduling) {
    376     LLVM_DEBUG(LIS->dump());
    377     MF->verify(this, "Before machine scheduling.");
    378   }
    379   RegClassInfo->runOnMachineFunction(*MF);
    380 
    381   // Instantiate the selected scheduler for this target, function, and
    382   // optimization level.
    383   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
    384   scheduleRegions(*Scheduler, false);
    385 
    386   LLVM_DEBUG(LIS->dump());
    387   if (VerifyScheduling)
    388     MF->verify(this, "After machine scheduling.");
    389   return true;
    390 }
    391 
    392 bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
    393   if (skipFunction(mf.getFunction()))
    394     return false;
    395 
    396   if (EnablePostRAMachineSched.getNumOccurrences()) {
    397     if (!EnablePostRAMachineSched)
    398       return false;
    399   } else if (!mf.getSubtarget().enablePostRAScheduler()) {
    400     LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n");
    401     return false;
    402   }
    403   LLVM_DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs()));
    404 
    405   // Initialize the context of the pass.
    406   MF = &mf;
    407   MLI = &getAnalysis<MachineLoopInfo>();
    408   PassConfig = &getAnalysis<TargetPassConfig>();
    409 
    410   if (VerifyScheduling)
    411     MF->verify(this, "Before post machine scheduling.");
    412 
    413   // Instantiate the selected scheduler for this target, function, and
    414   // optimization level.
    415   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
    416   scheduleRegions(*Scheduler, true);
    417 
    418   if (VerifyScheduling)
    419     MF->verify(this, "After post machine scheduling.");
    420   return true;
    421 }
    422 
    423 /// Return true of the given instruction should not be included in a scheduling
    424 /// region.
    425 ///
    426 /// MachineScheduler does not currently support scheduling across calls. To
    427 /// handle calls, the DAG builder needs to be modified to create register
    428 /// anti/output dependencies on the registers clobbered by the call's regmask
    429 /// operand. In PreRA scheduling, the stack pointer adjustment already prevents
    430 /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce
    431 /// the boundary, but there would be no benefit to postRA scheduling across
    432 /// calls this late anyway.
    433 static bool isSchedBoundary(MachineBasicBlock::iterator MI,
    434                             MachineBasicBlock *MBB,
    435                             MachineFunction *MF,
    436                             const TargetInstrInfo *TII) {
    437   return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF);
    438 }
    439 
    440 /// A region of an MBB for scheduling.
    441 namespace {
    442 struct SchedRegion {
    443   /// RegionBegin is the first instruction in the scheduling region, and
    444   /// RegionEnd is either MBB->end() or the scheduling boundary after the
    445   /// last instruction in the scheduling region. These iterators cannot refer
    446   /// to instructions outside of the identified scheduling region because
    447   /// those may be reordered before scheduling this region.
    448   MachineBasicBlock::iterator RegionBegin;
    449   MachineBasicBlock::iterator RegionEnd;
    450   unsigned NumRegionInstrs;
    451 
    452   SchedRegion(MachineBasicBlock::iterator B, MachineBasicBlock::iterator E,
    453               unsigned N) :
    454     RegionBegin(B), RegionEnd(E), NumRegionInstrs(N) {}
    455 };
    456 } // end anonymous namespace
    457 
    458 using MBBRegionsVector = SmallVector<SchedRegion, 16>;
    459 
    460 static void
    461 getSchedRegions(MachineBasicBlock *MBB,
    462                 MBBRegionsVector &Regions,
    463                 bool RegionsTopDown) {
    464   MachineFunction *MF = MBB->getParent();
    465   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
    466 
    467   MachineBasicBlock::iterator I = nullptr;
    468   for(MachineBasicBlock::iterator RegionEnd = MBB->end();
    469       RegionEnd != MBB->begin(); RegionEnd = I) {
    470 
    471     // Avoid decrementing RegionEnd for blocks with no terminator.
    472     if (RegionEnd != MBB->end() ||
    473         isSchedBoundary(&*std::prev(RegionEnd), &*MBB, MF, TII)) {
    474       --RegionEnd;
    475     }
    476 
    477     // The next region starts above the previous region. Look backward in the
    478     // instruction stream until we find the nearest boundary.
    479     unsigned NumRegionInstrs = 0;
    480     I = RegionEnd;
    481     for (;I != MBB->begin(); --I) {
    482       MachineInstr &MI = *std::prev(I);
    483       if (isSchedBoundary(&MI, &*MBB, MF, TII))
    484         break;
    485       if (!MI.isDebugInstr())
    486         // MBB::size() uses instr_iterator to count. Here we need a bundle to
    487         // count as a single instruction.
    488         ++NumRegionInstrs;
    489     }
    490 
    491     Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
    492   }
    493 
    494   if (RegionsTopDown)
    495     std::reverse(Regions.begin(), Regions.end());
    496 }
    497 
    498 /// Main driver for both MachineScheduler and PostMachineScheduler.
    499 void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
    500                                            bool FixKillFlags) {
    501   // Visit all machine basic blocks.
    502   //
    503   // TODO: Visit blocks in global postorder or postorder within the bottom-up
    504   // loop tree. Then we can optionally compute global RegPressure.
    505   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
    506        MBB != MBBEnd; ++MBB) {
    507 
    508     Scheduler.startBlock(&*MBB);
    509 
    510 #ifndef NDEBUG
    511     if (SchedOnlyFunc.getNumOccurrences() && SchedOnlyFunc != MF->getName())
    512       continue;
    513     if (SchedOnlyBlock.getNumOccurrences()
    514         && (int)SchedOnlyBlock != MBB->getNumber())
    515       continue;
    516 #endif
    517 
    518     // Break the block into scheduling regions [I, RegionEnd). RegionEnd
    519     // points to the scheduling boundary at the bottom of the region. The DAG
    520     // does not include RegionEnd, but the region does (i.e. the next
    521     // RegionEnd is above the previous RegionBegin). If the current block has
    522     // no terminator then RegionEnd == MBB->end() for the bottom region.
    523     //
    524     // All the regions of MBB are first found and stored in MBBRegions, which
    525     // will be processed (MBB) top-down if initialized with true.
    526     //
    527     // The Scheduler may insert instructions during either schedule() or
    528     // exitRegion(), even for empty regions. So the local iterators 'I' and
    529     // 'RegionEnd' are invalid across these calls. Instructions must not be
    530     // added to other regions than the current one without updating MBBRegions.
    531 
    532     MBBRegionsVector MBBRegions;
    533     getSchedRegions(&*MBB, MBBRegions, Scheduler.doMBBSchedRegionsTopDown());
    534     for (MBBRegionsVector::iterator R = MBBRegions.begin();
    535          R != MBBRegions.end(); ++R) {
    536       MachineBasicBlock::iterator I = R->RegionBegin;
    537       MachineBasicBlock::iterator RegionEnd = R->RegionEnd;
    538       unsigned NumRegionInstrs = R->NumRegionInstrs;
    539 
    540       // Notify the scheduler of the region, even if we may skip scheduling
    541       // it. Perhaps it still needs to be bundled.
    542       Scheduler.enterRegion(&*MBB, I, RegionEnd, NumRegionInstrs);
    543 
    544       // Skip empty scheduling regions (0 or 1 schedulable instructions).
    545       if (I == RegionEnd || I == std::prev(RegionEnd)) {
    546         // Close the current region. Bundle the terminator if needed.
    547         // This invalidates 'RegionEnd' and 'I'.
    548         Scheduler.exitRegion();
    549         continue;
    550       }
    551       LLVM_DEBUG(dbgs() << "********** MI Scheduling **********\n");
    552       LLVM_DEBUG(dbgs() << MF->getName() << ":" << printMBBReference(*MBB)
    553                         << " " << MBB->getName() << "\n  From: " << *I
    554                         << "    To: ";
    555                  if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
    556                  else dbgs() << "End";
    557                  dbgs() << " RegionInstrs: " << NumRegionInstrs << '\n');
    558       if (DumpCriticalPathLength) {
    559         errs() << MF->getName();
    560         errs() << ":%bb. " << MBB->getNumber();
    561         errs() << " " << MBB->getName() << " \n";
    562       }
    563 
    564       // Schedule a region: possibly reorder instructions.
    565       // This invalidates the original region iterators.
    566       Scheduler.schedule();
    567 
    568       // Close the current region.
    569       Scheduler.exitRegion();
    570     }
    571     Scheduler.finishBlock();
    572     // FIXME: Ideally, no further passes should rely on kill flags. However,
    573     // thumb2 size reduction is currently an exception, so the PostMIScheduler
    574     // needs to do this.
    575     if (FixKillFlags)
    576       Scheduler.fixupKills(*MBB);
    577   }
    578   Scheduler.finalizeSchedule();
    579 }
    580 
    581 void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const {
    582   // unimplemented
    583 }
    584 
    585 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    586 LLVM_DUMP_METHOD void ReadyQueue::dump() const {
    587   dbgs() << "Queue " << Name << ": ";
    588   for (const SUnit *SU : Queue)
    589     dbgs() << SU->NodeNum << " ";
    590   dbgs() << "\n";
    591 }
    592 #endif
    593 
    594 //===----------------------------------------------------------------------===//
    595 // ScheduleDAGMI - Basic machine instruction scheduling. This is
    596 // independent of PreRA/PostRA scheduling and involves no extra book-keeping for
    597 // virtual registers.
    598 // ===----------------------------------------------------------------------===/
    599 
    600 // Provide a vtable anchor.
    601 ScheduleDAGMI::~ScheduleDAGMI() = default;
    602 
    603 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
    604   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
    605 }
    606 
    607 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
    608   if (SuccSU != &ExitSU) {
    609     // Do not use WillCreateCycle, it assumes SD scheduling.
    610     // If Pred is reachable from Succ, then the edge creates a cycle.
    611     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
    612       return false;
    613     Topo.AddPred(SuccSU, PredDep.getSUnit());
    614   }
    615   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
    616   // Return true regardless of whether a new edge needed to be inserted.
    617   return true;
    618 }
    619 
    620 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
    621 /// NumPredsLeft reaches zero, release the successor node.
    622 ///
    623 /// FIXME: Adjust SuccSU height based on MinLatency.
    624 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
    625   SUnit *SuccSU = SuccEdge->getSUnit();
    626 
    627   if (SuccEdge->isWeak()) {
    628     --SuccSU->WeakPredsLeft;
    629     if (SuccEdge->isCluster())
    630       NextClusterSucc = SuccSU;
    631     return;
    632   }
    633 #ifndef NDEBUG
    634   if (SuccSU->NumPredsLeft == 0) {
    635     dbgs() << "*** Scheduling failed! ***\n";
    636     SuccSU->dump(this);
    637     dbgs() << " has been released too many times!\n";
    638     llvm_unreachable(nullptr);
    639   }
    640 #endif
    641   // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However,
    642   // CurrCycle may have advanced since then.
    643   if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency())
    644     SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency();
    645 
    646   --SuccSU->NumPredsLeft;
    647   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
    648     SchedImpl->releaseTopNode(SuccSU);
    649 }
    650 
    651 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
    652 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
    653   for (SDep &Succ : SU->Succs)
    654     releaseSucc(SU, &Succ);
    655 }
    656 
    657 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
    658 /// NumSuccsLeft reaches zero, release the predecessor node.
    659 ///
    660 /// FIXME: Adjust PredSU height based on MinLatency.
    661 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
    662   SUnit *PredSU = PredEdge->getSUnit();
    663 
    664   if (PredEdge->isWeak()) {
    665     --PredSU->WeakSuccsLeft;
    666     if (PredEdge->isCluster())
    667       NextClusterPred = PredSU;
    668     return;
    669   }
    670 #ifndef NDEBUG
    671   if (PredSU->NumSuccsLeft == 0) {
    672     dbgs() << "*** Scheduling failed! ***\n";
    673     PredSU->dump(this);
    674     dbgs() << " has been released too many times!\n";
    675     llvm_unreachable(nullptr);
    676   }
    677 #endif
    678   // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However,
    679   // CurrCycle may have advanced since then.
    680   if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency())
    681     PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency();
    682 
    683   --PredSU->NumSuccsLeft;
    684   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
    685     SchedImpl->releaseBottomNode(PredSU);
    686 }
    687 
    688 /// releasePredecessors - Call releasePred on each of SU's predecessors.
    689 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
    690   for (SDep &Pred : SU->Preds)
    691     releasePred(SU, &Pred);
    692 }
    693 
    694 void ScheduleDAGMI::startBlock(MachineBasicBlock *bb) {
    695   ScheduleDAGInstrs::startBlock(bb);
    696   SchedImpl->enterMBB(bb);
    697 }
    698 
    699 void ScheduleDAGMI::finishBlock() {
    700   SchedImpl->leaveMBB();
    701   ScheduleDAGInstrs::finishBlock();
    702 }
    703 
    704 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
    705 /// crossing a scheduling boundary. [begin, end) includes all instructions in
    706 /// the region, including the boundary itself and single-instruction regions
    707 /// that don't get scheduled.
    708 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
    709                                      MachineBasicBlock::iterator begin,
    710                                      MachineBasicBlock::iterator end,
    711                                      unsigned regioninstrs)
    712 {
    713   ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs);
    714 
    715   SchedImpl->initPolicy(begin, end, regioninstrs);
    716 }
    717 
    718 /// This is normally called from the main scheduler loop but may also be invoked
    719 /// by the scheduling strategy to perform additional code motion.
    720 void ScheduleDAGMI::moveInstruction(
    721   MachineInstr *MI, MachineBasicBlock::iterator InsertPos) {
    722   // Advance RegionBegin if the first instruction moves down.
    723   if (&*RegionBegin == MI)
    724     ++RegionBegin;
    725 
    726   // Update the instruction stream.
    727   BB->splice(InsertPos, BB, MI);
    728 
    729   // Update LiveIntervals
    730   if (LIS)
    731     LIS->handleMove(*MI, /*UpdateFlags=*/true);
    732 
    733   // Recede RegionBegin if an instruction moves above the first.
    734   if (RegionBegin == InsertPos)
    735     RegionBegin = MI;
    736 }
    737 
    738 bool ScheduleDAGMI::checkSchedLimit() {
    739 #ifndef NDEBUG
    740   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
    741     CurrentTop = CurrentBottom;
    742     return false;
    743   }
    744   ++NumInstrsScheduled;
    745 #endif
    746   return true;
    747 }
    748 
    749 /// Per-region scheduling driver, called back from
    750 /// MachineScheduler::runOnMachineFunction. This is a simplified driver that
    751 /// does not consider liveness or register pressure. It is useful for PostRA
    752 /// scheduling and potentially other custom schedulers.
    753 void ScheduleDAGMI::schedule() {
    754   LLVM_DEBUG(dbgs() << "ScheduleDAGMI::schedule starting\n");
    755   LLVM_DEBUG(SchedImpl->dumpPolicy());
    756 
    757   // Build the DAG.
    758   buildSchedGraph(AA);
    759 
    760   Topo.InitDAGTopologicalSorting();
    761 
    762   postprocessDAG();
    763 
    764   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    765   findRootsAndBiasEdges(TopRoots, BotRoots);
    766 
    767   LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
    768              for (const SUnit &SU
    769                   : SUnits) SU.dumpAll(this);
    770              if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
    771   if (ViewMISchedDAGs) viewGraph();
    772 
    773   // Initialize the strategy before modifying the DAG.
    774   // This may initialize a DFSResult to be used for queue priority.
    775   SchedImpl->initialize(this);
    776 
    777   // Initialize ready queues now that the DAG and priority data are finalized.
    778   initQueues(TopRoots, BotRoots);
    779 
    780   bool IsTopNode = false;
    781   while (true) {
    782     LLVM_DEBUG(dbgs() << "** ScheduleDAGMI::schedule picking next node\n");
    783     SUnit *SU = SchedImpl->pickNode(IsTopNode);
    784     if (!SU) break;
    785 
    786     assert(!SU->isScheduled && "Node already scheduled");
    787     if (!checkSchedLimit())
    788       break;
    789 
    790     MachineInstr *MI = SU->getInstr();
    791     if (IsTopNode) {
    792       assert(SU->isTopReady() && "node still has unscheduled dependencies");
    793       if (&*CurrentTop == MI)
    794         CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    795       else
    796         moveInstruction(MI, CurrentTop);
    797     } else {
    798       assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    799       MachineBasicBlock::iterator priorII =
    800         priorNonDebug(CurrentBottom, CurrentTop);
    801       if (&*priorII == MI)
    802         CurrentBottom = priorII;
    803       else {
    804         if (&*CurrentTop == MI)
    805           CurrentTop = nextIfDebug(++CurrentTop, priorII);
    806         moveInstruction(MI, CurrentBottom);
    807         CurrentBottom = MI;
    808       }
    809     }
    810     // Notify the scheduling strategy before updating the DAG.
    811     // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues
    812     // runs, it can then use the accurate ReadyCycle time to determine whether
    813     // newly released nodes can move to the readyQ.
    814     SchedImpl->schedNode(SU, IsTopNode);
    815 
    816     updateQueues(SU, IsTopNode);
    817   }
    818   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    819 
    820   placeDebugValues();
    821 
    822   LLVM_DEBUG({
    823     dbgs() << "*** Final schedule for "
    824            << printMBBReference(*begin()->getParent()) << " ***\n";
    825     dumpSchedule();
    826     dbgs() << '\n';
    827   });
    828 }
    829 
    830 /// Apply each ScheduleDAGMutation step in order.
    831 void ScheduleDAGMI::postprocessDAG() {
    832   for (auto &m : Mutations)
    833     m->apply(this);
    834 }
    835 
    836 void ScheduleDAGMI::
    837 findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
    838                       SmallVectorImpl<SUnit*> &BotRoots) {
    839   for (SUnit &SU : SUnits) {
    840     assert(!SU.isBoundaryNode() && "Boundary node should not be in SUnits");
    841 
    842     // Order predecessors so DFSResult follows the critical path.
    843     SU.biasCriticalPath();
    844 
    845     // A SUnit is ready to top schedule if it has no predecessors.
    846     if (!SU.NumPredsLeft)
    847       TopRoots.push_back(&SU);
    848     // A SUnit is ready to bottom schedule if it has no successors.
    849     if (!SU.NumSuccsLeft)
    850       BotRoots.push_back(&SU);
    851   }
    852   ExitSU.biasCriticalPath();
    853 }
    854 
    855 /// Identify DAG roots and setup scheduler queues.
    856 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
    857                                ArrayRef<SUnit*> BotRoots) {
    858   NextClusterSucc = nullptr;
    859   NextClusterPred = nullptr;
    860 
    861   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
    862   //
    863   // Nodes with unreleased weak edges can still be roots.
    864   // Release top roots in forward order.
    865   for (SUnit *SU : TopRoots)
    866     SchedImpl->releaseTopNode(SU);
    867 
    868   // Release bottom roots in reverse order so the higher priority nodes appear
    869   // first. This is more natural and slightly more efficient.
    870   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
    871          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
    872     SchedImpl->releaseBottomNode(*I);
    873   }
    874 
    875   releaseSuccessors(&EntrySU);
    876   releasePredecessors(&ExitSU);
    877 
    878   SchedImpl->registerRoots();
    879 
    880   // Advance past initial DebugValues.
    881   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
    882   CurrentBottom = RegionEnd;
    883 }
    884 
    885 /// Update scheduler queues after scheduling an instruction.
    886 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
    887   // Release dependent instructions for scheduling.
    888   if (IsTopNode)
    889     releaseSuccessors(SU);
    890   else
    891     releasePredecessors(SU);
    892 
    893   SU->isScheduled = true;
    894 }
    895 
    896 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
    897 void ScheduleDAGMI::placeDebugValues() {
    898   // If first instruction was a DBG_VALUE then put it back.
    899   if (FirstDbgValue) {
    900     BB->splice(RegionBegin, BB, FirstDbgValue);
    901     RegionBegin = FirstDbgValue;
    902   }
    903 
    904   for (std::vector<std::pair<MachineInstr *, MachineInstr *>>::iterator
    905          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
    906     std::pair<MachineInstr *, MachineInstr *> P = *std::prev(DI);
    907     MachineInstr *DbgValue = P.first;
    908     MachineBasicBlock::iterator OrigPrevMI = P.second;
    909     if (&*RegionBegin == DbgValue)
    910       ++RegionBegin;
    911     BB->splice(++OrigPrevMI, BB, DbgValue);
    912     if (OrigPrevMI == std::prev(RegionEnd))
    913       RegionEnd = DbgValue;
    914   }
    915   DbgValues.clear();
    916   FirstDbgValue = nullptr;
    917 }
    918 
    919 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    920 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
    921   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
    922     if (SUnit *SU = getSUnit(&(*MI)))
    923       SU->dump(this);
    924     else
    925       dbgs() << "Missing SUnit\n";
    926   }
    927 }
    928 #endif
    929 
    930 //===----------------------------------------------------------------------===//
    931 // ScheduleDAGMILive - Base class for MachineInstr scheduling with LiveIntervals
    932 // preservation.
    933 //===----------------------------------------------------------------------===//
    934 
    935 ScheduleDAGMILive::~ScheduleDAGMILive() {
    936   delete DFSResult;
    937 }
    938 
    939 void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
    940   const MachineInstr &MI = *SU.getInstr();
    941   for (const MachineOperand &MO : MI.operands()) {
    942     if (!MO.isReg())
    943       continue;
    944     if (!MO.readsReg())
    945       continue;
    946     if (TrackLaneMasks && !MO.isUse())
    947       continue;
    948 
    949     unsigned Reg = MO.getReg();
    950     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    951       continue;
    952 
    953     // Ignore re-defs.
    954     if (TrackLaneMasks) {
    955       bool FoundDef = false;
    956       for (const MachineOperand &MO2 : MI.operands()) {
    957         if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
    958           FoundDef = true;
    959           break;
    960         }
    961       }
    962       if (FoundDef)
    963         continue;
    964     }
    965 
    966     // Record this local VReg use.
    967     VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
    968     for (; UI != VRegUses.end(); ++UI) {
    969       if (UI->SU == &SU)
    970         break;
    971     }
    972     if (UI == VRegUses.end())
    973       VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
    974   }
    975 }
    976 
    977 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
    978 /// crossing a scheduling boundary. [begin, end) includes all instructions in
    979 /// the region, including the boundary itself and single-instruction regions
    980 /// that don't get scheduled.
    981 void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
    982                                 MachineBasicBlock::iterator begin,
    983                                 MachineBasicBlock::iterator end,
    984                                 unsigned regioninstrs)
    985 {
    986   // ScheduleDAGMI initializes SchedImpl's per-region policy.
    987   ScheduleDAGMI::enterRegion(bb, begin, end, regioninstrs);
    988 
    989   // For convenience remember the end of the liveness region.
    990   LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : std::next(RegionEnd);
    991 
    992   SUPressureDiffs.clear();
    993 
    994   ShouldTrackPressure = SchedImpl->shouldTrackPressure();
    995   ShouldTrackLaneMasks = SchedImpl->shouldTrackLaneMasks();
    996 
    997   assert((!ShouldTrackLaneMasks || ShouldTrackPressure) &&
    998          "ShouldTrackLaneMasks requires ShouldTrackPressure");
    999 }
   1000 
   1001 // Setup the register pressure trackers for the top scheduled top and bottom
   1002 // scheduled regions.
   1003 void ScheduleDAGMILive::initRegPressure() {
   1004   VRegUses.clear();
   1005   VRegUses.setUniverse(MRI.getNumVirtRegs());
   1006   for (SUnit &SU : SUnits)
   1007     collectVRegUses(SU);
   1008 
   1009   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
   1010                     ShouldTrackLaneMasks, false);
   1011   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
   1012                     ShouldTrackLaneMasks, false);
   1013 
   1014   // Close the RPTracker to finalize live ins.
   1015   RPTracker.closeRegion();
   1016 
   1017   LLVM_DEBUG(RPTracker.dump());
   1018 
   1019   // Initialize the live ins and live outs.
   1020   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
   1021   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
   1022 
   1023   // Close one end of the tracker so we can call
   1024   // getMaxUpward/DownwardPressureDelta before advancing across any
   1025   // instructions. This converts currently live regs into live ins/outs.
   1026   TopRPTracker.closeTop();
   1027   BotRPTracker.closeBottom();
   1028 
   1029   BotRPTracker.initLiveThru(RPTracker);
   1030   if (!BotRPTracker.getLiveThru().empty()) {
   1031     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
   1032     LLVM_DEBUG(dbgs() << "Live Thru: ";
   1033                dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
   1034   };
   1035 
   1036   // For each live out vreg reduce the pressure change associated with other
   1037   // uses of the same vreg below the live-out reaching def.
   1038   updatePressureDiffs(RPTracker.getPressure().LiveOutRegs);
   1039 
   1040   // Account for liveness generated by the region boundary.
   1041   if (LiveRegionEnd != RegionEnd) {
   1042     SmallVector<RegisterMaskPair, 8> LiveUses;
   1043     BotRPTracker.recede(&LiveUses);
   1044     updatePressureDiffs(LiveUses);
   1045   }
   1046 
   1047   LLVM_DEBUG(dbgs() << "Top Pressure:\n";
   1048              dumpRegSetPressure(TopRPTracker.getRegSetPressureAtPos(), TRI);
   1049              dbgs() << "Bottom Pressure:\n";
   1050              dumpRegSetPressure(BotRPTracker.getRegSetPressureAtPos(), TRI););
   1051 
   1052   assert((BotRPTracker.getPos() == RegionEnd ||
   1053           (RegionEnd->isDebugInstr() &&
   1054            BotRPTracker.getPos() == priorNonDebug(RegionEnd, RegionBegin))) &&
   1055          "Can't find the region bottom");
   1056 
   1057   // Cache the list of excess pressure sets in this region. This will also track
   1058   // the max pressure in the scheduled code for these sets.
   1059   RegionCriticalPSets.clear();
   1060   const std::vector<unsigned> &RegionPressure =
   1061     RPTracker.getPressure().MaxSetPressure;
   1062   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
   1063     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
   1064     if (RegionPressure[i] > Limit) {
   1065       LLVM_DEBUG(dbgs() << TRI->getRegPressureSetName(i) << " Limit " << Limit
   1066                         << " Actual " << RegionPressure[i] << "\n");
   1067       RegionCriticalPSets.push_back(PressureChange(i));
   1068     }
   1069   }
   1070   LLVM_DEBUG(dbgs() << "Excess PSets: ";
   1071              for (const PressureChange &RCPS
   1072                   : RegionCriticalPSets) dbgs()
   1073              << TRI->getRegPressureSetName(RCPS.getPSet()) << " ";
   1074              dbgs() << "\n");
   1075 }
   1076 
   1077 void ScheduleDAGMILive::
   1078 updateScheduledPressure(const SUnit *SU,
   1079                         const std::vector<unsigned> &NewMaxPressure) {
   1080   const PressureDiff &PDiff = getPressureDiff(SU);
   1081   unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size();
   1082   for (const PressureChange &PC : PDiff) {
   1083     if (!PC.isValid())
   1084       break;
   1085     unsigned ID = PC.getPSet();
   1086     while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID)
   1087       ++CritIdx;
   1088     if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) {
   1089       if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc()
   1090           && NewMaxPressure[ID] <= (unsigned)std::numeric_limits<int16_t>::max())
   1091         RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]);
   1092     }
   1093     unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID);
   1094     if (NewMaxPressure[ID] >= Limit - 2) {
   1095       LLVM_DEBUG(dbgs() << "  " << TRI->getRegPressureSetName(ID) << ": "
   1096                         << NewMaxPressure[ID]
   1097                         << ((NewMaxPressure[ID] > Limit) ? " > " : " <= ")
   1098                         << Limit << "(+ " << BotRPTracker.getLiveThru()[ID]
   1099                         << " livethru)\n");
   1100     }
   1101   }
   1102 }
   1103 
   1104 /// Update the PressureDiff array for liveness after scheduling this
   1105 /// instruction.
   1106 void ScheduleDAGMILive::updatePressureDiffs(
   1107     ArrayRef<RegisterMaskPair> LiveUses) {
   1108   for (const RegisterMaskPair &P : LiveUses) {
   1109     unsigned Reg = P.RegUnit;
   1110     /// FIXME: Currently assuming single-use physregs.
   1111     if (!TRI->isVirtualRegister(Reg))
   1112       continue;
   1113 
   1114     if (ShouldTrackLaneMasks) {
   1115       // If the register has just become live then other uses won't change
   1116       // this fact anymore => decrement pressure.
   1117       // If the register has just become dead then other uses make it come
   1118       // back to life => increment pressure.
   1119       bool Decrement = P.LaneMask.any();
   1120 
   1121       for (const VReg2SUnit &V2SU
   1122            : make_range(VRegUses.find(Reg), VRegUses.end())) {
   1123         SUnit &SU = *V2SU.SU;
   1124         if (SU.isScheduled || &SU == &ExitSU)
   1125           continue;
   1126 
   1127         PressureDiff &PDiff = getPressureDiff(&SU);
   1128         PDiff.addPressureChange(Reg, Decrement, &MRI);
   1129         LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU.NodeNum << ") "
   1130                           << printReg(Reg, TRI) << ':'
   1131                           << PrintLaneMask(P.LaneMask) << ' ' << *SU.getInstr();
   1132                    dbgs() << "              to "; PDiff.dump(*TRI););
   1133       }
   1134     } else {
   1135       assert(P.LaneMask.any());
   1136       LLVM_DEBUG(dbgs() << "  LiveReg: " << printVRegOrUnit(Reg, TRI) << "\n");
   1137       // This may be called before CurrentBottom has been initialized. However,
   1138       // BotRPTracker must have a valid position. We want the value live into the
   1139       // instruction or live out of the block, so ask for the previous
   1140       // instruction's live-out.
   1141       const LiveInterval &LI = LIS->getInterval(Reg);
   1142       VNInfo *VNI;
   1143       MachineBasicBlock::const_iterator I =
   1144         nextIfDebug(BotRPTracker.getPos(), BB->end());
   1145       if (I == BB->end())
   1146         VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
   1147       else {
   1148         LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*I));
   1149         VNI = LRQ.valueIn();
   1150       }
   1151       // RegisterPressureTracker guarantees that readsReg is true for LiveUses.
   1152       assert(VNI && "No live value at use.");
   1153       for (const VReg2SUnit &V2SU
   1154            : make_range(VRegUses.find(Reg), VRegUses.end())) {
   1155         SUnit *SU = V2SU.SU;
   1156         // If this use comes before the reaching def, it cannot be a last use,
   1157         // so decrease its pressure change.
   1158         if (!SU->isScheduled && SU != &ExitSU) {
   1159           LiveQueryResult LRQ =
   1160               LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
   1161           if (LRQ.valueIn() == VNI) {
   1162             PressureDiff &PDiff = getPressureDiff(SU);
   1163             PDiff.addPressureChange(Reg, true, &MRI);
   1164             LLVM_DEBUG(dbgs() << "  UpdateRegP: SU(" << SU->NodeNum << ") "
   1165                               << *SU->getInstr();
   1166                        dbgs() << "              to "; PDiff.dump(*TRI););
   1167           }
   1168         }
   1169       }
   1170     }
   1171   }
   1172 }
   1173 
   1174 /// schedule - Called back from MachineScheduler::runOnMachineFunction
   1175 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
   1176 /// only includes instructions that have DAG nodes, not scheduling boundaries.
   1177 ///
   1178 /// This is a skeletal driver, with all the functionality pushed into helpers,
   1179 /// so that it can be easily extended by experimental schedulers. Generally,
   1180 /// implementing MachineSchedStrategy should be sufficient to implement a new
   1181 /// scheduling algorithm. However, if a scheduler further subclasses
   1182 /// ScheduleDAGMILive then it will want to override this virtual method in order
   1183 /// to update any specialized state.
   1184 void ScheduleDAGMILive::schedule() {
   1185   LLVM_DEBUG(dbgs() << "ScheduleDAGMILive::schedule starting\n");
   1186   LLVM_DEBUG(SchedImpl->dumpPolicy());
   1187   buildDAGWithRegPressure();
   1188 
   1189   Topo.InitDAGTopologicalSorting();
   1190 
   1191   postprocessDAG();
   1192 
   1193   SmallVector<SUnit*, 8> TopRoots, BotRoots;
   1194   findRootsAndBiasEdges(TopRoots, BotRoots);
   1195 
   1196   // Initialize the strategy before modifying the DAG.
   1197   // This may initialize a DFSResult to be used for queue priority.
   1198   SchedImpl->initialize(this);
   1199 
   1200   LLVM_DEBUG(if (EntrySU.getInstr() != nullptr) EntrySU.dumpAll(this);
   1201              for (const SUnit &SU
   1202                   : SUnits) {
   1203                SU.dumpAll(this);
   1204                if (ShouldTrackPressure) {
   1205                  dbgs() << "  Pressure Diff      : ";
   1206                  getPressureDiff(&SU).dump(*TRI);
   1207                }
   1208                dbgs() << "  Single Issue       : ";
   1209                if (SchedModel.mustBeginGroup(SU.getInstr()) &&
   1210                    SchedModel.mustEndGroup(SU.getInstr()))
   1211                  dbgs() << "true;";
   1212                else
   1213                  dbgs() << "false;";
   1214                dbgs() << '\n';
   1215              } if (ExitSU.getInstr() != nullptr) ExitSU.dumpAll(this););
   1216   if (ViewMISchedDAGs) viewGraph();
   1217 
   1218   // Initialize ready queues now that the DAG and priority data are finalized.
   1219   initQueues(TopRoots, BotRoots);
   1220 
   1221   bool IsTopNode = false;
   1222   while (true) {
   1223     LLVM_DEBUG(dbgs() << "** ScheduleDAGMILive::schedule picking next node\n");
   1224     SUnit *SU = SchedImpl->pickNode(IsTopNode);
   1225     if (!SU) break;
   1226 
   1227     assert(!SU->isScheduled && "Node already scheduled");
   1228     if (!checkSchedLimit())
   1229       break;
   1230 
   1231     scheduleMI(SU, IsTopNode);
   1232 
   1233     if (DFSResult) {
   1234       unsigned SubtreeID = DFSResult->getSubtreeID(SU);
   1235       if (!ScheduledTrees.test(SubtreeID)) {
   1236         ScheduledTrees.set(SubtreeID);
   1237         DFSResult->scheduleTree(SubtreeID);
   1238         SchedImpl->scheduleTree(SubtreeID);
   1239       }
   1240     }
   1241 
   1242     // Notify the scheduling strategy after updating the DAG.
   1243     SchedImpl->schedNode(SU, IsTopNode);
   1244 
   1245     updateQueues(SU, IsTopNode);
   1246   }
   1247   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
   1248 
   1249   placeDebugValues();
   1250 
   1251   LLVM_DEBUG({
   1252     dbgs() << "*** Final schedule for "
   1253            << printMBBReference(*begin()->getParent()) << " ***\n";
   1254     dumpSchedule();
   1255     dbgs() << '\n';
   1256   });
   1257 }
   1258 
   1259 /// Build the DAG and setup three register pressure trackers.
   1260 void ScheduleDAGMILive::buildDAGWithRegPressure() {
   1261   if (!ShouldTrackPressure) {
   1262     RPTracker.reset();
   1263     RegionCriticalPSets.clear();
   1264     buildSchedGraph(AA);
   1265     return;
   1266   }
   1267 
   1268   // Initialize the register pressure tracker used by buildSchedGraph.
   1269   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
   1270                  ShouldTrackLaneMasks, /*TrackUntiedDefs=*/true);
   1271 
   1272   // Account for liveness generate by the region boundary.
   1273   if (LiveRegionEnd != RegionEnd)
   1274     RPTracker.recede();
   1275 
   1276   // Build the DAG, and compute current register pressure.
   1277   buildSchedGraph(AA, &RPTracker, &SUPressureDiffs, LIS, ShouldTrackLaneMasks);
   1278 
   1279   // Initialize top/bottom trackers after computing region pressure.
   1280   initRegPressure();
   1281 }
   1282 
   1283 void ScheduleDAGMILive::computeDFSResult() {
   1284   if (!DFSResult)
   1285     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
   1286   DFSResult->clear();
   1287   ScheduledTrees.clear();
   1288   DFSResult->resize(SUnits.size());
   1289   DFSResult->compute(SUnits);
   1290   ScheduledTrees.resize(DFSResult->getNumSubtrees());
   1291 }
   1292 
   1293 /// Compute the max cyclic critical path through the DAG. The scheduling DAG
   1294 /// only provides the critical path for single block loops. To handle loops that
   1295 /// span blocks, we could use the vreg path latencies provided by
   1296 /// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently
   1297 /// available for use in the scheduler.
   1298 ///
   1299 /// The cyclic path estimation identifies a def-use pair that crosses the back
   1300 /// edge and considers the depth and height of the nodes. For example, consider
   1301 /// the following instruction sequence where each instruction has unit latency
   1302 /// and defines an epomymous virtual register:
   1303 ///
   1304 /// a->b(a,c)->c(b)->d(c)->exit
   1305 ///
   1306 /// The cyclic critical path is a two cycles: b->c->b
   1307 /// The acyclic critical path is four cycles: a->b->c->d->exit
   1308 /// LiveOutHeight = height(c) = len(c->d->exit) = 2
   1309 /// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3
   1310 /// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4
   1311 /// LiveInDepth = depth(b) = len(a->b) = 1
   1312 ///
   1313 /// LiveOutDepth - LiveInDepth = 3 - 1 = 2
   1314 /// LiveInHeight - LiveOutHeight = 4 - 2 = 2
   1315 /// CyclicCriticalPath = min(2, 2) = 2
   1316 ///
   1317 /// This could be relevant to PostRA scheduling, but is currently implemented
   1318 /// assuming LiveIntervals.
   1319 unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
   1320   // This only applies to single block loop.
   1321   if (!BB->isSuccessor(BB))
   1322     return 0;
   1323 
   1324   unsigned MaxCyclicLatency = 0;
   1325   // Visit each live out vreg def to find def/use pairs that cross iterations.
   1326   for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
   1327     unsigned Reg = P.RegUnit;
   1328     if (!TRI->isVirtualRegister(Reg))
   1329         continue;
   1330     const LiveInterval &LI = LIS->getInterval(Reg);
   1331     const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
   1332     if (!DefVNI)
   1333       continue;
   1334 
   1335     MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def);
   1336     const SUnit *DefSU = getSUnit(DefMI);
   1337     if (!DefSU)
   1338       continue;
   1339 
   1340     unsigned LiveOutHeight = DefSU->getHeight();
   1341     unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency;
   1342     // Visit all local users of the vreg def.
   1343     for (const VReg2SUnit &V2SU
   1344          : make_range(VRegUses.find(Reg), VRegUses.end())) {
   1345       SUnit *SU = V2SU.SU;
   1346       if (SU == &ExitSU)
   1347         continue;
   1348 
   1349       // Only consider uses of the phi.
   1350       LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(*SU->getInstr()));
   1351       if (!LRQ.valueIn()->isPHIDef())
   1352         continue;
   1353 
   1354       // Assume that a path spanning two iterations is a cycle, which could
   1355       // overestimate in strange cases. This allows cyclic latency to be
   1356       // estimated as the minimum slack of the vreg's depth or height.
   1357       unsigned CyclicLatency = 0;
   1358       if (LiveOutDepth > SU->getDepth())
   1359         CyclicLatency = LiveOutDepth - SU->getDepth();
   1360 
   1361       unsigned LiveInHeight = SU->getHeight() + DefSU->Latency;
   1362       if (LiveInHeight > LiveOutHeight) {
   1363         if (LiveInHeight - LiveOutHeight < CyclicLatency)
   1364           CyclicLatency = LiveInHeight - LiveOutHeight;
   1365       } else
   1366         CyclicLatency = 0;
   1367 
   1368       LLVM_DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU("
   1369                         << SU->NodeNum << ") = " << CyclicLatency << "c\n");
   1370       if (CyclicLatency > MaxCyclicLatency)
   1371         MaxCyclicLatency = CyclicLatency;
   1372     }
   1373   }
   1374   LLVM_DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n");
   1375   return MaxCyclicLatency;
   1376 }
   1377 
   1378 /// Release ExitSU predecessors and setup scheduler queues. Re-position
   1379 /// the Top RP tracker in case the region beginning has changed.
   1380 void ScheduleDAGMILive::initQueues(ArrayRef<SUnit*> TopRoots,
   1381                                    ArrayRef<SUnit*> BotRoots) {
   1382   ScheduleDAGMI::initQueues(TopRoots, BotRoots);
   1383   if (ShouldTrackPressure) {
   1384     assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
   1385     TopRPTracker.setPos(CurrentTop);
   1386   }
   1387 }
   1388 
   1389 /// Move an instruction and update register pressure.
   1390 void ScheduleDAGMILive::scheduleMI(SUnit *SU, bool IsTopNode) {
   1391   // Move the instruction to its new location in the instruction stream.
   1392   MachineInstr *MI = SU->getInstr();
   1393 
   1394   if (IsTopNode) {
   1395     assert(SU->isTopReady() && "node still has unscheduled dependencies");
   1396     if (&*CurrentTop == MI)
   1397       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
   1398     else {
   1399       moveInstruction(MI, CurrentTop);
   1400       TopRPTracker.setPos(MI);
   1401     }
   1402 
   1403     if (ShouldTrackPressure) {
   1404       // Update top scheduled pressure.
   1405       RegisterOperands RegOpers;
   1406       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
   1407       if (ShouldTrackLaneMasks) {
   1408         // Adjust liveness and add missing dead+read-undef flags.
   1409         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
   1410         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
   1411       } else {
   1412         // Adjust for missing dead-def flags.
   1413         RegOpers.detectDeadDefs(*MI, *LIS);
   1414       }
   1415 
   1416       TopRPTracker.advance(RegOpers);
   1417       assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
   1418       LLVM_DEBUG(dbgs() << "Top Pressure:\n"; dumpRegSetPressure(
   1419                      TopRPTracker.getRegSetPressureAtPos(), TRI););
   1420 
   1421       updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure);
   1422     }
   1423   } else {
   1424     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
   1425     MachineBasicBlock::iterator priorII =
   1426       priorNonDebug(CurrentBottom, CurrentTop);
   1427     if (&*priorII == MI)
   1428       CurrentBottom = priorII;
   1429     else {
   1430       if (&*CurrentTop == MI) {
   1431         CurrentTop = nextIfDebug(++CurrentTop, priorII);
   1432         TopRPTracker.setPos(CurrentTop);
   1433       }
   1434       moveInstruction(MI, CurrentBottom);
   1435       CurrentBottom = MI;
   1436       BotRPTracker.setPos(CurrentBottom);
   1437     }
   1438     if (ShouldTrackPressure) {
   1439       RegisterOperands RegOpers;
   1440       RegOpers.collect(*MI, *TRI, MRI, ShouldTrackLaneMasks, false);
   1441       if (ShouldTrackLaneMasks) {
   1442         // Adjust liveness and add missing dead+read-undef flags.
   1443         SlotIndex SlotIdx = LIS->getInstructionIndex(*MI).getRegSlot();
   1444         RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx, MI);
   1445       } else {
   1446         // Adjust for missing dead-def flags.
   1447         RegOpers.detectDeadDefs(*MI, *LIS);
   1448       }
   1449 
   1450       if (BotRPTracker.getPos() != CurrentBottom)
   1451         BotRPTracker.recedeSkipDebugValues();
   1452       SmallVector<RegisterMaskPair, 8> LiveUses;
   1453       BotRPTracker.recede(RegOpers, &LiveUses);
   1454       assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
   1455       LLVM_DEBUG(dbgs() << "Bottom Pressure:\n"; dumpRegSetPressure(
   1456                      BotRPTracker.getRegSetPressureAtPos(), TRI););
   1457 
   1458       updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure);
   1459       updatePressureDiffs(LiveUses);
   1460     }
   1461   }
   1462 }
   1463 
   1464 //===----------------------------------------------------------------------===//
   1465 // BaseMemOpClusterMutation - DAG post-processing to cluster loads or stores.
   1466 //===----------------------------------------------------------------------===//
   1467 
   1468 namespace {
   1469 
   1470 /// Post-process the DAG to create cluster edges between neighboring
   1471 /// loads or between neighboring stores.
   1472 class BaseMemOpClusterMutation : public ScheduleDAGMutation {
   1473   struct MemOpInfo {
   1474     SUnit *SU;
   1475     unsigned BaseReg;
   1476     int64_t Offset;
   1477 
   1478     MemOpInfo(SUnit *su, unsigned reg, int64_t ofs)
   1479         : SU(su), BaseReg(reg), Offset(ofs) {}
   1480 
   1481     bool operator<(const MemOpInfo&RHS) const {
   1482       return std::tie(BaseReg, Offset, SU->NodeNum) <
   1483              std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
   1484     }
   1485   };
   1486 
   1487   const TargetInstrInfo *TII;
   1488   const TargetRegisterInfo *TRI;
   1489   bool IsLoad;
   1490 
   1491 public:
   1492   BaseMemOpClusterMutation(const TargetInstrInfo *tii,
   1493                            const TargetRegisterInfo *tri, bool IsLoad)
   1494       : TII(tii), TRI(tri), IsLoad(IsLoad) {}
   1495 
   1496   void apply(ScheduleDAGInstrs *DAGInstrs) override;
   1497 
   1498 protected:
   1499   void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
   1500 };
   1501 
   1502 class StoreClusterMutation : public BaseMemOpClusterMutation {
   1503 public:
   1504   StoreClusterMutation(const TargetInstrInfo *tii,
   1505                        const TargetRegisterInfo *tri)
   1506       : BaseMemOpClusterMutation(tii, tri, false) {}
   1507 };
   1508 
   1509 class LoadClusterMutation : public BaseMemOpClusterMutation {
   1510 public:
   1511   LoadClusterMutation(const TargetInstrInfo *tii, const TargetRegisterInfo *tri)
   1512       : BaseMemOpClusterMutation(tii, tri, true) {}
   1513 };
   1514 
   1515 } // end anonymous namespace
   1516 
   1517 namespace llvm {
   1518 
   1519 std::unique_ptr<ScheduleDAGMutation>
   1520 createLoadClusterDAGMutation(const TargetInstrInfo *TII,
   1521                              const TargetRegisterInfo *TRI) {
   1522   return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
   1523                             : nullptr;
   1524 }
   1525 
   1526 std::unique_ptr<ScheduleDAGMutation>
   1527 createStoreClusterDAGMutation(const TargetInstrInfo *TII,
   1528                               const TargetRegisterInfo *TRI) {
   1529   return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
   1530                             : nullptr;
   1531 }
   1532 
   1533 } // end namespace llvm
   1534 
   1535 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
   1536     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
   1537   SmallVector<MemOpInfo, 32> MemOpRecords;
   1538   for (SUnit *SU : MemOps) {
   1539     unsigned BaseReg;
   1540     int64_t Offset;
   1541     if (TII->getMemOpBaseRegImmOfs(*SU->getInstr(), BaseReg, Offset, TRI))
   1542       MemOpRecords.push_back(MemOpInfo(SU, BaseReg, Offset));
   1543   }
   1544   if (MemOpRecords.size() < 2)
   1545     return;
   1546 
   1547   llvm::sort(MemOpRecords.begin(), MemOpRecords.end());
   1548   unsigned ClusterLength = 1;
   1549   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
   1550     SUnit *SUa = MemOpRecords[Idx].SU;
   1551     SUnit *SUb = MemOpRecords[Idx+1].SU;
   1552     if (TII->shouldClusterMemOps(*SUa->getInstr(), MemOpRecords[Idx].BaseReg,
   1553                                  *SUb->getInstr(), MemOpRecords[Idx+1].BaseReg,
   1554                                  ClusterLength) &&
   1555         DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
   1556       LLVM_DEBUG(dbgs() << "Cluster ld/st SU(" << SUa->NodeNum << ") - SU("
   1557                         << SUb->NodeNum << ")\n");
   1558       // Copy successor edges from SUa to SUb. Interleaving computation
   1559       // dependent on SUa can prevent load combining due to register reuse.
   1560       // Predecessor edges do not need to be copied from SUb to SUa since nearby
   1561       // loads should have effectively the same inputs.
   1562       for (const SDep &Succ : SUa->Succs) {
   1563         if (Succ.getSUnit() == SUb)
   1564           continue;
   1565         LLVM_DEBUG(dbgs() << "  Copy Succ SU(" << Succ.getSUnit()->NodeNum
   1566                           << ")\n");
   1567         DAG->addEdge(Succ.getSUnit(), SDep(SUb, SDep::Artificial));
   1568       }
   1569       ++ClusterLength;
   1570     } else
   1571       ClusterLength = 1;
   1572   }
   1573 }
   1574 
   1575 /// Callback from DAG postProcessing to create cluster edges for loads.
   1576 void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
   1577   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
   1578 
   1579   // Map DAG NodeNum to store chain ID.
   1580   DenseMap<unsigned, unsigned> StoreChainIDs;
   1581   // Map each store chain to a set of dependent MemOps.
   1582   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
   1583   for (SUnit &SU : DAG->SUnits) {
   1584     if ((IsLoad && !SU.getInstr()->mayLoad()) ||
   1585         (!IsLoad && !SU.getInstr()->mayStore()))
   1586       continue;
   1587 
   1588     unsigned ChainPredID = DAG->SUnits.size();
   1589     for (const SDep &Pred : SU.Preds) {
   1590       if (Pred.isCtrl()) {
   1591         ChainPredID = Pred.getSUnit()->NodeNum;
   1592         break;
   1593       }
   1594     }
   1595     // Check if this chain-like pred has been seen
   1596     // before. ChainPredID==MaxNodeID at the top of the schedule.
   1597     unsigned NumChains = StoreChainDependents.size();
   1598     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
   1599       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
   1600     if (Result.second)
   1601       StoreChainDependents.resize(NumChains + 1);
   1602     StoreChainDependents[Result.first->second].push_back(&SU);
   1603   }
   1604 
   1605   // Iterate over the store chains.
   1606   for (auto &SCD : StoreChainDependents)
   1607     clusterNeighboringMemOps(SCD, DAG);
   1608 }
   1609 
   1610 //===----------------------------------------------------------------------===//
   1611 // CopyConstrain - DAG post-processing to encourage copy elimination.
   1612 //===----------------------------------------------------------------------===//
   1613 
   1614 namespace {
   1615 
   1616 /// Post-process the DAG to create weak edges from all uses of a copy to
   1617 /// the one use that defines the copy's source vreg, most likely an induction
   1618 /// variable increment.
   1619 class CopyConstrain : public ScheduleDAGMutation {
   1620   // Transient state.
   1621   SlotIndex RegionBeginIdx;
   1622 
   1623   // RegionEndIdx is the slot index of the last non-debug instruction in the
   1624   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
   1625   SlotIndex RegionEndIdx;
   1626 
   1627 public:
   1628   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
   1629 
   1630   void apply(ScheduleDAGInstrs *DAGInstrs) override;
   1631 
   1632 protected:
   1633   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG);
   1634 };
   1635 
   1636 } // end anonymous namespace
   1637 
   1638 namespace llvm {
   1639 
   1640 std::unique_ptr<ScheduleDAGMutation>
   1641 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
   1642                                const TargetRegisterInfo *TRI) {
   1643   return llvm::make_unique<CopyConstrain>(TII, TRI);
   1644 }
   1645 
   1646 } // end namespace llvm
   1647 
   1648 /// constrainLocalCopy handles two possibilities:
   1649 /// 1) Local src:
   1650 /// I0:     = dst
   1651 /// I1: src = ...
   1652 /// I2:     = dst
   1653 /// I3: dst = src (copy)
   1654 /// (create pred->succ edges I0->I1, I2->I1)
   1655 ///
   1656 /// 2) Local copy:
   1657 /// I0: dst = src (copy)
   1658 /// I1:     = dst
   1659 /// I2: src = ...
   1660 /// I3:     = dst
   1661 /// (create pred->succ edges I1->I2, I3->I2)
   1662 ///
   1663 /// Although the MachineScheduler is currently constrained to single blocks,
   1664 /// this algorithm should handle extended blocks. An EBB is a set of
   1665 /// contiguously numbered blocks such that the previous block in the EBB is
   1666 /// always the single predecessor.
   1667 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
   1668   LiveIntervals *LIS = DAG->getLIS();
   1669   MachineInstr *Copy = CopySU->getInstr();
   1670 
   1671   // Check for pure vreg copies.
   1672   const MachineOperand &SrcOp = Copy->getOperand(1);
   1673   unsigned SrcReg = SrcOp.getReg();
   1674   if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
   1675     return;
   1676 
   1677   const MachineOperand &DstOp = Copy->getOperand(0);
   1678   unsigned DstReg = DstOp.getReg();
   1679   if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
   1680     return;
   1681 
   1682   // Check if either the dest or source is local. If it's live across a back
   1683   // edge, it's not local. Note that if both vregs are live across the back
   1684   // edge, we cannot successfully contrain the copy without cyclic scheduling.
   1685   // If both the copy's source and dest are local live intervals, then we
   1686   // should treat the dest as the global for the purpose of adding
   1687   // constraints. This adds edges from source's other uses to the copy.
   1688   unsigned LocalReg = SrcReg;
   1689   unsigned GlobalReg = DstReg;
   1690   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
   1691   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
   1692     LocalReg = DstReg;
   1693     GlobalReg = SrcReg;
   1694     LocalLI = &LIS->getInterval(LocalReg);
   1695     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
   1696       return;
   1697   }
   1698   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
   1699 
   1700   // Find the global segment after the start of the local LI.
   1701   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
   1702   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
   1703   // local live range. We could create edges from other global uses to the local
   1704   // start, but the coalescer should have already eliminated these cases, so
   1705   // don't bother dealing with it.
   1706   if (GlobalSegment == GlobalLI->end())
   1707     return;
   1708 
   1709   // If GlobalSegment is killed at the LocalLI->start, the call to find()
   1710   // returned the next global segment. But if GlobalSegment overlaps with
   1711   // LocalLI->start, then advance to the next segment. If a hole in GlobalLI
   1712   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
   1713   if (GlobalSegment->contains(LocalLI->beginIndex()))
   1714     ++GlobalSegment;
   1715 
   1716   if (GlobalSegment == GlobalLI->end())
   1717     return;
   1718 
   1719   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
   1720   if (GlobalSegment != GlobalLI->begin()) {
   1721     // Two address defs have no hole.
   1722     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->end,
   1723                                GlobalSegment->start)) {
   1724       return;
   1725     }
   1726     // If the prior global segment may be defined by the same two-address
   1727     // instruction that also defines LocalLI, then can't make a hole here.
   1728     if (SlotIndex::isSameInstr(std::prev(GlobalSegment)->start,
   1729                                LocalLI->beginIndex())) {
   1730       return;
   1731     }
   1732     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
   1733     // it would be a disconnected component in the live range.
   1734     assert(std::prev(GlobalSegment)->start < LocalLI->beginIndex() &&
   1735            "Disconnected LRG within the scheduling region.");
   1736   }
   1737   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
   1738   if (!GlobalDef)
   1739     return;
   1740 
   1741   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
   1742   if (!GlobalSU)
   1743     return;
   1744 
   1745   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
   1746   // constraining the uses of the last local def to precede GlobalDef.
   1747   SmallVector<SUnit*,8> LocalUses;
   1748   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
   1749   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
   1750   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
   1751   for (const SDep &Succ : LastLocalSU->Succs) {
   1752     if (Succ.getKind() != SDep::Data || Succ.getReg() != LocalReg)
   1753       continue;
   1754     if (Succ.getSUnit() == GlobalSU)
   1755       continue;
   1756     if (!DAG->canAddEdge(GlobalSU, Succ.getSUnit()))
   1757       return;
   1758     LocalUses.push_back(Succ.getSUnit());
   1759   }
   1760   // Open the top of the GlobalLI hole by constraining any earlier global uses
   1761   // to precede the start of LocalLI.
   1762   SmallVector<SUnit*,8> GlobalUses;
   1763   MachineInstr *FirstLocalDef =
   1764     LIS->getInstructionFromIndex(LocalLI->beginIndex());
   1765   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
   1766   for (const SDep &Pred : GlobalSU->Preds) {
   1767     if (Pred.getKind() != SDep::Anti || Pred.getReg() != GlobalReg)
   1768       continue;
   1769     if (Pred.getSUnit() == FirstLocalSU)
   1770       continue;
   1771     if (!DAG->canAddEdge(FirstLocalSU, Pred.getSUnit()))
   1772       return;
   1773     GlobalUses.push_back(Pred.getSUnit());
   1774   }
   1775   LLVM_DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
   1776   // Add the weak edges.
   1777   for (SmallVectorImpl<SUnit*>::const_iterator
   1778          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
   1779     LLVM_DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
   1780                       << GlobalSU->NodeNum << ")\n");
   1781     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
   1782   }
   1783   for (SmallVectorImpl<SUnit*>::const_iterator
   1784          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
   1785     LLVM_DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
   1786                       << FirstLocalSU->NodeNum << ")\n");
   1787     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
   1788   }
   1789 }
   1790 
   1791 /// Callback from DAG postProcessing to create weak edges to encourage
   1792 /// copy elimination.
   1793 void CopyConstrain::apply(ScheduleDAGInstrs *DAGInstrs) {
   1794   ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
   1795   assert(DAG->hasVRegLiveness() && "Expect VRegs with LiveIntervals");
   1796 
   1797   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
   1798   if (FirstPos == DAG->end())
   1799     return;
   1800   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(*FirstPos);
   1801   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
   1802       *priorNonDebug(DAG->end(), DAG->begin()));
   1803 
   1804   for (SUnit &SU : DAG->SUnits) {
   1805     if (!SU.getInstr()->isCopy())
   1806       continue;
   1807 
   1808     constrainLocalCopy(&SU, static_cast<ScheduleDAGMILive*>(DAG));
   1809   }
   1810 }
   1811 
   1812 //===----------------------------------------------------------------------===//
   1813 // MachineSchedStrategy helpers used by GenericScheduler, GenericPostScheduler
   1814 // and possibly other custom schedulers.
   1815 //===----------------------------------------------------------------------===//
   1816 
   1817 static const unsigned InvalidCycle = ~0U;
   1818 
   1819 SchedBoundary::~SchedBoundary() { delete HazardRec; }
   1820 
   1821 /// Given a Count of resource usage and a Latency value, return true if a
   1822 /// SchedBoundary becomes resource limited.
   1823 static bool checkResourceLimit(unsigned LFactor, unsigned Count,
   1824                                unsigned Latency) {
   1825   return (int)(Count - (Latency * LFactor)) > (int)LFactor;
   1826 }
   1827 
   1828 void SchedBoundary::reset() {
   1829   // A new HazardRec is created for each DAG and owned by SchedBoundary.
   1830   // Destroying and reconstructing it is very expensive though. So keep
   1831   // invalid, placeholder HazardRecs.
   1832   if (HazardRec && HazardRec->isEnabled()) {
   1833     delete HazardRec;
   1834     HazardRec = nullptr;
   1835   }
   1836   Available.clear();
   1837   Pending.clear();
   1838   CheckPending = false;
   1839   CurrCycle = 0;
   1840   CurrMOps = 0;
   1841   MinReadyCycle = std::numeric_limits<unsigned>::max();
   1842   ExpectedLatency = 0;
   1843   DependentLatency = 0;
   1844   RetiredMOps = 0;
   1845   MaxExecutedResCount = 0;
   1846   ZoneCritResIdx = 0;
   1847   IsResourceLimited = false;
   1848   ReservedCycles.clear();
   1849 #ifndef NDEBUG
   1850   // Track the maximum number of stall cycles that could arise either from the
   1851   // latency of a DAG edge or the number of cycles that a processor resource is
   1852   // reserved (SchedBoundary::ReservedCycles).
   1853   MaxObservedStall = 0;
   1854 #endif
   1855   // Reserve a zero-count for invalid CritResIdx.
   1856   ExecutedResCounts.resize(1);
   1857   assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
   1858 }
   1859 
   1860 void SchedRemainder::
   1861 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
   1862   reset();
   1863   if (!SchedModel->hasInstrSchedModel())
   1864     return;
   1865   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
   1866   for (SUnit &SU : DAG->SUnits) {
   1867     const MCSchedClassDesc *SC = DAG->getSchedClass(&SU);
   1868     RemIssueCount += SchedModel->getNumMicroOps(SU.getInstr(), SC)
   1869       * SchedModel->getMicroOpFactor();
   1870     for (TargetSchedModel::ProcResIter
   1871            PI = SchedModel->getWriteProcResBegin(SC),
   1872            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   1873       unsigned PIdx = PI->ProcResourceIdx;
   1874       unsigned Factor = SchedModel->getResourceFactor(PIdx);
   1875       RemainingCounts[PIdx] += (Factor * PI->Cycles);
   1876     }
   1877   }
   1878 }
   1879 
   1880 void SchedBoundary::
   1881 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
   1882   reset();
   1883   DAG = dag;
   1884   SchedModel = smodel;
   1885   Rem = rem;
   1886   if (SchedModel->hasInstrSchedModel()) {
   1887     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
   1888     ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
   1889   }
   1890 }
   1891 
   1892 /// Compute the stall cycles based on this SUnit's ready time. Heuristics treat
   1893 /// these "soft stalls" differently than the hard stall cycles based on CPU
   1894 /// resources and computed by checkHazard(). A fully in-order model
   1895 /// (MicroOpBufferSize==0) will not make use of this since instructions are not
   1896 /// available for scheduling until they are ready. However, a weaker in-order
   1897 /// model may use this for heuristics. For example, if a processor has in-order
   1898 /// behavior when reading certain resources, this may come into play.
   1899 unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
   1900   if (!SU->isUnbuffered)
   1901     return 0;
   1902 
   1903   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
   1904   if (ReadyCycle > CurrCycle)
   1905     return ReadyCycle - CurrCycle;
   1906   return 0;
   1907 }
   1908 
   1909 /// Compute the next cycle at which the given processor resource can be
   1910 /// scheduled.
   1911 unsigned SchedBoundary::
   1912 getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
   1913   unsigned NextUnreserved = ReservedCycles[PIdx];
   1914   // If this resource has never been used, always return cycle zero.
   1915   if (NextUnreserved == InvalidCycle)
   1916     return 0;
   1917   // For bottom-up scheduling add the cycles needed for the current operation.
   1918   if (!isTop())
   1919     NextUnreserved += Cycles;
   1920   return NextUnreserved;
   1921 }
   1922 
   1923 /// Does this SU have a hazard within the current instruction group.
   1924 ///
   1925 /// The scheduler supports two modes of hazard recognition. The first is the
   1926 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
   1927 /// supports highly complicated in-order reservation tables
   1928 /// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
   1929 ///
   1930 /// The second is a streamlined mechanism that checks for hazards based on
   1931 /// simple counters that the scheduler itself maintains. It explicitly checks
   1932 /// for instruction dispatch limitations, including the number of micro-ops that
   1933 /// can dispatch per cycle.
   1934 ///
   1935 /// TODO: Also check whether the SU must start a new group.
   1936 bool SchedBoundary::checkHazard(SUnit *SU) {
   1937   if (HazardRec->isEnabled()
   1938       && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) {
   1939     return true;
   1940   }
   1941 
   1942   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
   1943   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
   1944     LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
   1945                       << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
   1946     return true;
   1947   }
   1948 
   1949   if (CurrMOps > 0 &&
   1950       ((isTop() && SchedModel->mustBeginGroup(SU->getInstr())) ||
   1951        (!isTop() && SchedModel->mustEndGroup(SU->getInstr())))) {
   1952     LLVM_DEBUG(dbgs() << "  hazard: SU(" << SU->NodeNum << ") must "
   1953                       << (isTop() ? "begin" : "end") << " group\n");
   1954     return true;
   1955   }
   1956 
   1957   if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) {
   1958     const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   1959     for (const MCWriteProcResEntry &PE :
   1960           make_range(SchedModel->getWriteProcResBegin(SC),
   1961                      SchedModel->getWriteProcResEnd(SC))) {
   1962       unsigned ResIdx = PE.ProcResourceIdx;
   1963       unsigned Cycles = PE.Cycles;
   1964       unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
   1965       if (NRCycle > CurrCycle) {
   1966 #ifndef NDEBUG
   1967         MaxObservedStall = std::max(Cycles, MaxObservedStall);
   1968 #endif
   1969         LLVM_DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") "
   1970                           << SchedModel->getResourceName(ResIdx) << "="
   1971                           << NRCycle << "c\n");
   1972         return true;
   1973       }
   1974     }
   1975   }
   1976   return false;
   1977 }
   1978 
   1979 // Find the unscheduled node in ReadySUs with the highest latency.
   1980 unsigned SchedBoundary::
   1981 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
   1982   SUnit *LateSU = nullptr;
   1983   unsigned RemLatency = 0;
   1984   for (SUnit *SU : ReadySUs) {
   1985     unsigned L = getUnscheduledLatency(SU);
   1986     if (L > RemLatency) {
   1987       RemLatency = L;
   1988       LateSU = SU;
   1989     }
   1990   }
   1991   if (LateSU) {
   1992     LLVM_DEBUG(dbgs() << Available.getName() << " RemLatency SU("
   1993                       << LateSU->NodeNum << ") " << RemLatency << "c\n");
   1994   }
   1995   return RemLatency;
   1996 }
   1997 
   1998 // Count resources in this zone and the remaining unscheduled
   1999 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
   2000 // resource index, or zero if the zone is issue limited.
   2001 unsigned SchedBoundary::
   2002 getOtherResourceCount(unsigned &OtherCritIdx) {
   2003   OtherCritIdx = 0;
   2004   if (!SchedModel->hasInstrSchedModel())
   2005     return 0;
   2006 
   2007   unsigned OtherCritCount = Rem->RemIssueCount
   2008     + (RetiredMOps * SchedModel->getMicroOpFactor());
   2009   LLVM_DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
   2010                     << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
   2011   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
   2012        PIdx != PEnd; ++PIdx) {
   2013     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
   2014     if (OtherCount > OtherCritCount) {
   2015       OtherCritCount = OtherCount;
   2016       OtherCritIdx = PIdx;
   2017     }
   2018   }
   2019   if (OtherCritIdx) {
   2020     LLVM_DEBUG(
   2021         dbgs() << "  " << Available.getName() << " + Remain CritRes: "
   2022                << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
   2023                << " " << SchedModel->getResourceName(OtherCritIdx) << "\n");
   2024   }
   2025   return OtherCritCount;
   2026 }
   2027 
   2028 void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
   2029   assert(SU->getInstr() && "Scheduled SUnit must have instr");
   2030 
   2031 #ifndef NDEBUG
   2032   // ReadyCycle was been bumped up to the CurrCycle when this node was
   2033   // scheduled, but CurrCycle may have been eagerly advanced immediately after
   2034   // scheduling, so may now be greater than ReadyCycle.
   2035   if (ReadyCycle > CurrCycle)
   2036     MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall);
   2037 #endif
   2038 
   2039   if (ReadyCycle < MinReadyCycle)
   2040     MinReadyCycle = ReadyCycle;
   2041 
   2042   // Check for interlocks first. For the purpose of other heuristics, an
   2043   // instruction that cannot issue appears as if it's not in the ReadyQueue.
   2044   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
   2045   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU) ||
   2046       Available.size() >= ReadyListLimit)
   2047     Pending.push(SU);
   2048   else
   2049     Available.push(SU);
   2050 }
   2051 
   2052 /// Move the boundary of scheduled code by one cycle.
   2053 void SchedBoundary::bumpCycle(unsigned NextCycle) {
   2054   if (SchedModel->getMicroOpBufferSize() == 0) {
   2055     assert(MinReadyCycle < std::numeric_limits<unsigned>::max() &&
   2056            "MinReadyCycle uninitialized");
   2057     if (MinReadyCycle > NextCycle)
   2058       NextCycle = MinReadyCycle;
   2059   }
   2060   // Update the current micro-ops, which will issue in the next cycle.
   2061   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
   2062   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
   2063 
   2064   // Decrement DependentLatency based on the next cycle.
   2065   if ((NextCycle - CurrCycle) > DependentLatency)
   2066     DependentLatency = 0;
   2067   else
   2068     DependentLatency -= (NextCycle - CurrCycle);
   2069 
   2070   if (!HazardRec->isEnabled()) {
   2071     // Bypass HazardRec virtual calls.
   2072     CurrCycle = NextCycle;
   2073   } else {
   2074     // Bypass getHazardType calls in case of long latency.
   2075     for (; CurrCycle != NextCycle; ++CurrCycle) {
   2076       if (isTop())
   2077         HazardRec->AdvanceCycle();
   2078       else
   2079         HazardRec->RecedeCycle();
   2080     }
   2081   }
   2082   CheckPending = true;
   2083   IsResourceLimited =
   2084       checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
   2085                          getScheduledLatency());
   2086 
   2087   LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
   2088                     << '\n');
   2089 }
   2090 
   2091 void SchedBoundary::incExecutedResources(unsigned PIdx, unsigned Count) {
   2092   ExecutedResCounts[PIdx] += Count;
   2093   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
   2094     MaxExecutedResCount = ExecutedResCounts[PIdx];
   2095 }
   2096 
   2097 /// Add the given processor resource to this scheduled zone.
   2098 ///
   2099 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
   2100 /// during which this resource is consumed.
   2101 ///
   2102 /// \return the next cycle at which the instruction may execute without
   2103 /// oversubscribing resources.
   2104 unsigned SchedBoundary::
   2105 countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
   2106   unsigned Factor = SchedModel->getResourceFactor(PIdx);
   2107   unsigned Count = Factor * Cycles;
   2108   LLVM_DEBUG(dbgs() << "  " << SchedModel->getResourceName(PIdx) << " +"
   2109                     << Cycles << "x" << Factor << "u\n");
   2110 
   2111   // Update Executed resources counts.
   2112   incExecutedResources(PIdx, Count);
   2113   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
   2114   Rem->RemainingCounts[PIdx] -= Count;
   2115 
   2116   // Check if this resource exceeds the current critical resource. If so, it
   2117   // becomes the critical resource.
   2118   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
   2119     ZoneCritResIdx = PIdx;
   2120     LLVM_DEBUG(dbgs() << "  *** Critical resource "
   2121                       << SchedModel->getResourceName(PIdx) << ": "
   2122                       << getResourceCount(PIdx) / SchedModel->getLatencyFactor()
   2123                       << "c\n");
   2124   }
   2125   // For reserved resources, record the highest cycle using the resource.
   2126   unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
   2127   if (NextAvailable > CurrCycle) {
   2128     LLVM_DEBUG(dbgs() << "  Resource conflict: "
   2129                       << SchedModel->getProcResource(PIdx)->Name
   2130                       << " reserved until @" << NextAvailable << "\n");
   2131   }
   2132   return NextAvailable;
   2133 }
   2134 
   2135 /// Move the boundary of scheduled code by one SUnit.
   2136 void SchedBoundary::bumpNode(SUnit *SU) {
   2137   // Update the reservation table.
   2138   if (HazardRec->isEnabled()) {
   2139     if (!isTop() && SU->isCall) {
   2140       // Calls are scheduled with their preceding instructions. For bottom-up
   2141       // scheduling, clear the pipeline state before emitting.
   2142       HazardRec->Reset();
   2143     }
   2144     HazardRec->EmitInstruction(SU);
   2145   }
   2146   // checkHazard should prevent scheduling multiple instructions per cycle that
   2147   // exceed the issue width.
   2148   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   2149   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
   2150   assert(
   2151       (CurrMOps == 0 || (CurrMOps + IncMOps) <= SchedModel->getIssueWidth()) &&
   2152       "Cannot schedule this instruction's MicroOps in the current cycle.");
   2153 
   2154   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
   2155   LLVM_DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
   2156 
   2157   unsigned NextCycle = CurrCycle;
   2158   switch (SchedModel->getMicroOpBufferSize()) {
   2159   case 0:
   2160     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
   2161     break;
   2162   case 1:
   2163     if (ReadyCycle > NextCycle) {
   2164       NextCycle = ReadyCycle;
   2165       LLVM_DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
   2166     }
   2167     break;
   2168   default:
   2169     // We don't currently model the OOO reorder buffer, so consider all
   2170     // scheduled MOps to be "retired". We do loosely model in-order resource
   2171     // latency. If this instruction uses an in-order resource, account for any
   2172     // likely stall cycles.
   2173     if (SU->isUnbuffered && ReadyCycle > NextCycle)
   2174       NextCycle = ReadyCycle;
   2175     break;
   2176   }
   2177   RetiredMOps += IncMOps;
   2178 
   2179   // Update resource counts and critical resource.
   2180   if (SchedModel->hasInstrSchedModel()) {
   2181     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
   2182     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
   2183     Rem->RemIssueCount -= DecRemIssue;
   2184     if (ZoneCritResIdx) {
   2185       // Scale scheduled micro-ops for comparing with the critical resource.
   2186       unsigned ScaledMOps =
   2187         RetiredMOps * SchedModel->getMicroOpFactor();
   2188 
   2189       // If scaled micro-ops are now more than the previous critical resource by
   2190       // a full cycle, then micro-ops issue becomes critical.
   2191       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
   2192           >= (int)SchedModel->getLatencyFactor()) {
   2193         ZoneCritResIdx = 0;
   2194         LLVM_DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
   2195                           << ScaledMOps / SchedModel->getLatencyFactor()
   2196                           << "c\n");
   2197       }
   2198     }
   2199     for (TargetSchedModel::ProcResIter
   2200            PI = SchedModel->getWriteProcResBegin(SC),
   2201            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   2202       unsigned RCycle =
   2203         countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle);
   2204       if (RCycle > NextCycle)
   2205         NextCycle = RCycle;
   2206     }
   2207     if (SU->hasReservedResource) {
   2208       // For reserved resources, record the highest cycle using the resource.
   2209       // For top-down scheduling, this is the cycle in which we schedule this
   2210       // instruction plus the number of cycles the operations reserves the
   2211       // resource. For bottom-up is it simply the instruction's cycle.
   2212       for (TargetSchedModel::ProcResIter
   2213              PI = SchedModel->getWriteProcResBegin(SC),
   2214              PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   2215         unsigned PIdx = PI->ProcResourceIdx;
   2216         if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
   2217           if (isTop()) {
   2218             ReservedCycles[PIdx] =
   2219               std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
   2220           }
   2221           else
   2222             ReservedCycles[PIdx] = NextCycle;
   2223         }
   2224       }
   2225     }
   2226   }
   2227   // Update ExpectedLatency and DependentLatency.
   2228   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
   2229   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
   2230   if (SU->getDepth() > TopLatency) {
   2231     TopLatency = SU->getDepth();
   2232     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " TopLatency SU("
   2233                       << SU->NodeNum << ") " << TopLatency << "c\n");
   2234   }
   2235   if (SU->getHeight() > BotLatency) {
   2236     BotLatency = SU->getHeight();
   2237     LLVM_DEBUG(dbgs() << "  " << Available.getName() << " BotLatency SU("
   2238                       << SU->NodeNum << ") " << BotLatency << "c\n");
   2239   }
   2240   // If we stall for any reason, bump the cycle.
   2241   if (NextCycle > CurrCycle)
   2242     bumpCycle(NextCycle);
   2243   else
   2244     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
   2245     // resource limited. If a stall occurred, bumpCycle does this.
   2246     IsResourceLimited =
   2247         checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
   2248                            getScheduledLatency());
   2249 
   2250   // Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
   2251   // resets CurrMOps. Loop to handle instructions with more MOps than issue in
   2252   // one cycle.  Since we commonly reach the max MOps here, opportunistically
   2253   // bump the cycle to avoid uselessly checking everything in the readyQ.
   2254   CurrMOps += IncMOps;
   2255 
   2256   // Bump the cycle count for issue group constraints.
   2257   // This must be done after NextCycle has been adjust for all other stalls.
   2258   // Calling bumpCycle(X) will reduce CurrMOps by one issue group and set
   2259   // currCycle to X.
   2260   if ((isTop() &&  SchedModel->mustEndGroup(SU->getInstr())) ||
   2261       (!isTop() && SchedModel->mustBeginGroup(SU->getInstr()))) {
   2262     LLVM_DEBUG(dbgs() << "  Bump cycle to " << (isTop() ? "end" : "begin")
   2263                       << " group\n");
   2264     bumpCycle(++NextCycle);
   2265   }
   2266 
   2267   while (CurrMOps >= SchedModel->getIssueWidth()) {
   2268     LLVM_DEBUG(dbgs() << "  *** Max MOps " << CurrMOps << " at cycle "
   2269                       << CurrCycle << '\n');
   2270     bumpCycle(++NextCycle);
   2271   }
   2272   LLVM_DEBUG(dumpScheduledState());
   2273 }
   2274 
   2275 /// Release pending ready nodes in to the available queue. This makes them
   2276 /// visible to heuristics.
   2277 void SchedBoundary::releasePending() {
   2278   // If the available queue is empty, it is safe to reset MinReadyCycle.
   2279   if (Available.empty())
   2280     MinReadyCycle = std::numeric_limits<unsigned>::max();
   2281 
   2282   // Check to see if any of the pending instructions are ready to issue.  If
   2283   // so, add them to the available queue.
   2284   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
   2285   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
   2286     SUnit *SU = *(Pending.begin()+i);
   2287     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
   2288 
   2289     if (ReadyCycle < MinReadyCycle)
   2290       MinReadyCycle = ReadyCycle;
   2291 
   2292     if (!IsBuffered && ReadyCycle > CurrCycle)
   2293       continue;
   2294 
   2295     if (checkHazard(SU))
   2296       continue;
   2297 
   2298     if (Available.size() >= ReadyListLimit)
   2299       break;
   2300 
   2301     Available.push(SU);
   2302     Pending.remove(Pending.begin()+i);
   2303     --i; --e;
   2304   }
   2305   CheckPending = false;
   2306 }
   2307 
   2308 /// Remove SU from the ready set for this boundary.
   2309 void SchedBoundary::removeReady(SUnit *SU) {
   2310   if (Available.isInQueue(SU))
   2311     Available.remove(Available.find(SU));
   2312   else {
   2313     assert(Pending.isInQueue(SU) && "bad ready count");
   2314     Pending.remove(Pending.find(SU));
   2315   }
   2316 }
   2317 
   2318 /// If this queue only has one ready candidate, return it. As a side effect,
   2319 /// defer any nodes that now hit a hazard, and advance the cycle until at least
   2320 /// one node is ready. If multiple instructions are ready, return NULL.
   2321 SUnit *SchedBoundary::pickOnlyChoice() {
   2322   if (CheckPending)
   2323     releasePending();
   2324 
   2325   if (CurrMOps > 0) {
   2326     // Defer any ready instrs that now have a hazard.
   2327     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
   2328       if (checkHazard(*I)) {
   2329         Pending.push(*I);
   2330         I = Available.remove(I);
   2331         continue;
   2332       }
   2333       ++I;
   2334     }
   2335   }
   2336   for (unsigned i = 0; Available.empty(); ++i) {
   2337 //  FIXME: Re-enable assert once PR20057 is resolved.
   2338 //    assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) &&
   2339 //           "permanent hazard");
   2340     (void)i;
   2341     bumpCycle(CurrCycle + 1);
   2342     releasePending();
   2343   }
   2344 
   2345   LLVM_DEBUG(Pending.dump());
   2346   LLVM_DEBUG(Available.dump());
   2347 
   2348   if (Available.size() == 1)
   2349     return *Available.begin();
   2350   return nullptr;
   2351 }
   2352 
   2353 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   2354 // This is useful information to dump after bumpNode.
   2355 // Note that the Queue contents are more useful before pickNodeFromQueue.
   2356 LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const {
   2357   unsigned ResFactor;
   2358   unsigned ResCount;
   2359   if (ZoneCritResIdx) {
   2360     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
   2361     ResCount = getResourceCount(ZoneCritResIdx);
   2362   } else {
   2363     ResFactor = SchedModel->getMicroOpFactor();
   2364     ResCount = RetiredMOps * ResFactor;
   2365   }
   2366   unsigned LFactor = SchedModel->getLatencyFactor();
   2367   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
   2368          << "  Retired: " << RetiredMOps;
   2369   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
   2370   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
   2371          << ResCount / ResFactor << " "
   2372          << SchedModel->getResourceName(ZoneCritResIdx)
   2373          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
   2374          << (IsResourceLimited ? "  - Resource" : "  - Latency")
   2375          << " limited.\n";
   2376 }
   2377 #endif
   2378 
   2379 //===----------------------------------------------------------------------===//
   2380 // GenericScheduler - Generic implementation of MachineSchedStrategy.
   2381 //===----------------------------------------------------------------------===//
   2382 
   2383 void GenericSchedulerBase::SchedCandidate::
   2384 initResourceDelta(const ScheduleDAGMI *DAG,
   2385                   const TargetSchedModel *SchedModel) {
   2386   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
   2387     return;
   2388 
   2389   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   2390   for (TargetSchedModel::ProcResIter
   2391          PI = SchedModel->getWriteProcResBegin(SC),
   2392          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   2393     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
   2394       ResDelta.CritResources += PI->Cycles;
   2395     if (PI->ProcResourceIdx == Policy.DemandResIdx)
   2396       ResDelta.DemandedResources += PI->Cycles;
   2397   }
   2398 }
   2399 
   2400 /// Set the CandPolicy given a scheduling zone given the current resources and
   2401 /// latencies inside and outside the zone.
   2402 void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
   2403                                      SchedBoundary &CurrZone,
   2404                                      SchedBoundary *OtherZone) {
   2405   // Apply preemptive heuristics based on the total latency and resources
   2406   // inside and outside this zone. Potential stalls should be considered before
   2407   // following this policy.
   2408 
   2409   // Compute remaining latency. We need this both to determine whether the
   2410   // overall schedule has become latency-limited and whether the instructions
   2411   // outside this zone are resource or latency limited.
   2412   //
   2413   // The "dependent" latency is updated incrementally during scheduling as the
   2414   // max height/depth of scheduled nodes minus the cycles since it was
   2415   // scheduled:
   2416   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
   2417   //
   2418   // The "independent" latency is the max ready queue depth:
   2419   //   ILat = max N.depth for N in Available|Pending
   2420   //
   2421   // RemainingLatency is the greater of independent and dependent latency.
   2422   unsigned RemLatency = CurrZone.getDependentLatency();
   2423   RemLatency = std::max(RemLatency,
   2424                         CurrZone.findMaxLatency(CurrZone.Available.elements()));
   2425   RemLatency = std::max(RemLatency,
   2426                         CurrZone.findMaxLatency(CurrZone.Pending.elements()));
   2427 
   2428   // Compute the critical resource outside the zone.
   2429   unsigned OtherCritIdx = 0;
   2430   unsigned OtherCount =
   2431     OtherZone ? OtherZone->getOtherResourceCount(OtherCritIdx) : 0;
   2432 
   2433   bool OtherResLimited = false;
   2434   if (SchedModel->hasInstrSchedModel())
   2435     OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
   2436                                          OtherCount, RemLatency);
   2437 
   2438   // Schedule aggressively for latency in PostRA mode. We don't check for
   2439   // acyclic latency during PostRA, and highly out-of-order processors will
   2440   // skip PostRA scheduling.
   2441   if (!OtherResLimited) {
   2442     if (IsPostRA || (RemLatency + CurrZone.getCurrCycle() > Rem.CriticalPath)) {
   2443       Policy.ReduceLatency |= true;
   2444       LLVM_DEBUG(dbgs() << "  " << CurrZone.Available.getName()
   2445                         << " RemainingLatency " << RemLatency << " + "
   2446                         << CurrZone.getCurrCycle() << "c > CritPath "
   2447                         << Rem.CriticalPath << "\n");
   2448     }
   2449   }
   2450   // If the same resource is limiting inside and outside the zone, do nothing.
   2451   if (CurrZone.getZoneCritResIdx() == OtherCritIdx)
   2452     return;
   2453 
   2454   LLVM_DEBUG(if (CurrZone.isResourceLimited()) {
   2455     dbgs() << "  " << CurrZone.Available.getName() << " ResourceLimited: "
   2456            << SchedModel->getResourceName(CurrZone.getZoneCritResIdx()) << "\n";
   2457   } if (OtherResLimited) dbgs()
   2458                  << "  RemainingLimit: "
   2459                  << SchedModel->getResourceName(OtherCritIdx) << "\n";
   2460              if (!CurrZone.isResourceLimited() && !OtherResLimited) dbgs()
   2461              << "  Latency limited both directions.\n");
   2462 
   2463   if (CurrZone.isResourceLimited() && !Policy.ReduceResIdx)
   2464     Policy.ReduceResIdx = CurrZone.getZoneCritResIdx();
   2465 
   2466   if (OtherResLimited)
   2467     Policy.DemandResIdx = OtherCritIdx;
   2468 }
   2469 
   2470 #ifndef NDEBUG
   2471 const char *GenericSchedulerBase::getReasonStr(
   2472   GenericSchedulerBase::CandReason Reason) {
   2473   switch (Reason) {
   2474   case NoCand:         return "NOCAND    ";
   2475   case Only1:          return "ONLY1     ";
   2476   case PhysRegCopy:    return "PREG-COPY ";
   2477   case RegExcess:      return "REG-EXCESS";
   2478   case RegCritical:    return "REG-CRIT  ";
   2479   case Stall:          return "STALL     ";
   2480   case Cluster:        return "CLUSTER   ";
   2481   case Weak:           return "WEAK      ";
   2482   case RegMax:         return "REG-MAX   ";
   2483   case ResourceReduce: return "RES-REDUCE";
   2484   case ResourceDemand: return "RES-DEMAND";
   2485   case TopDepthReduce: return "TOP-DEPTH ";
   2486   case TopPathReduce:  return "TOP-PATH  ";
   2487   case BotHeightReduce:return "BOT-HEIGHT";
   2488   case BotPathReduce:  return "BOT-PATH  ";
   2489   case NextDefUse:     return "DEF-USE   ";
   2490   case NodeOrder:      return "ORDER     ";
   2491   };
   2492   llvm_unreachable("Unknown reason!");
   2493 }
   2494 
   2495 void GenericSchedulerBase::traceCandidate(const SchedCandidate &Cand) {
   2496   PressureChange P;
   2497   unsigned ResIdx = 0;
   2498   unsigned Latency = 0;
   2499   switch (Cand.Reason) {
   2500   default:
   2501     break;
   2502   case RegExcess:
   2503     P = Cand.RPDelta.Excess;
   2504     break;
   2505   case RegCritical:
   2506     P = Cand.RPDelta.CriticalMax;
   2507     break;
   2508   case RegMax:
   2509     P = Cand.RPDelta.CurrentMax;
   2510     break;
   2511   case ResourceReduce:
   2512     ResIdx = Cand.Policy.ReduceResIdx;
   2513     break;
   2514   case ResourceDemand:
   2515     ResIdx = Cand.Policy.DemandResIdx;
   2516     break;
   2517   case TopDepthReduce:
   2518     Latency = Cand.SU->getDepth();
   2519     break;
   2520   case TopPathReduce:
   2521     Latency = Cand.SU->getHeight();
   2522     break;
   2523   case BotHeightReduce:
   2524     Latency = Cand.SU->getHeight();
   2525     break;
   2526   case BotPathReduce:
   2527     Latency = Cand.SU->getDepth();
   2528     break;
   2529   }
   2530   dbgs() << "  Cand SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
   2531   if (P.isValid())
   2532     dbgs() << " " << TRI->getRegPressureSetName(P.getPSet())
   2533            << ":" << P.getUnitInc() << " ";
   2534   else
   2535     dbgs() << "      ";
   2536   if (ResIdx)
   2537     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
   2538   else
   2539     dbgs() << "         ";
   2540   if (Latency)
   2541     dbgs() << " " << Latency << " cycles ";
   2542   else
   2543     dbgs() << "          ";
   2544   dbgs() << '\n';
   2545 }
   2546 #endif
   2547 
   2548 namespace llvm {
   2549 /// Return true if this heuristic determines order.
   2550 bool tryLess(int TryVal, int CandVal,
   2551              GenericSchedulerBase::SchedCandidate &TryCand,
   2552              GenericSchedulerBase::SchedCandidate &Cand,
   2553              GenericSchedulerBase::CandReason Reason) {
   2554   if (TryVal < CandVal) {
   2555     TryCand.Reason = Reason;
   2556     return true;
   2557   }
   2558   if (TryVal > CandVal) {
   2559     if (Cand.Reason > Reason)
   2560       Cand.Reason = Reason;
   2561     return true;
   2562   }
   2563   return false;
   2564 }
   2565 
   2566 bool tryGreater(int TryVal, int CandVal,
   2567                 GenericSchedulerBase::SchedCandidate &TryCand,
   2568                 GenericSchedulerBase::SchedCandidate &Cand,
   2569                 GenericSchedulerBase::CandReason Reason) {
   2570   if (TryVal > CandVal) {
   2571     TryCand.Reason = Reason;
   2572     return true;
   2573   }
   2574   if (TryVal < CandVal) {
   2575     if (Cand.Reason > Reason)
   2576       Cand.Reason = Reason;
   2577     return true;
   2578   }
   2579   return false;
   2580 }
   2581 
   2582 bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
   2583                 GenericSchedulerBase::SchedCandidate &Cand,
   2584                 SchedBoundary &Zone) {
   2585   if (Zone.isTop()) {
   2586     if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
   2587       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
   2588                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
   2589         return true;
   2590     }
   2591     if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
   2592                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
   2593       return true;
   2594   } else {
   2595     if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
   2596       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
   2597                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
   2598         return true;
   2599     }
   2600     if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
   2601                    TryCand, Cand, GenericSchedulerBase::BotPathReduce))
   2602       return true;
   2603   }
   2604   return false;
   2605 }
   2606 } // end namespace llvm
   2607 
   2608 static void tracePick(GenericSchedulerBase::CandReason Reason, bool IsTop) {
   2609   LLVM_DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
   2610                     << GenericSchedulerBase::getReasonStr(Reason) << '\n');
   2611 }
   2612 
   2613 static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand) {
   2614   tracePick(Cand.Reason, Cand.AtTop);
   2615 }
   2616 
   2617 void GenericScheduler::initialize(ScheduleDAGMI *dag) {
   2618   assert(dag->hasVRegLiveness() &&
   2619          "(PreRA)GenericScheduler needs vreg liveness");
   2620   DAG = static_cast<ScheduleDAGMILive*>(dag);
   2621   SchedModel = DAG->getSchedModel();
   2622   TRI = DAG->TRI;
   2623 
   2624   Rem.init(DAG, SchedModel);
   2625   Top.init(DAG, SchedModel, &Rem);
   2626   Bot.init(DAG, SchedModel, &Rem);
   2627 
   2628   // Initialize resource counts.
   2629 
   2630   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
   2631   // are disabled, then these HazardRecs will be disabled.
   2632   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
   2633   if (!Top.HazardRec) {
   2634     Top.HazardRec =
   2635         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
   2636             Itin, DAG);
   2637   }
   2638   if (!Bot.HazardRec) {
   2639     Bot.HazardRec =
   2640         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
   2641             Itin, DAG);
   2642   }
   2643   TopCand.SU = nullptr;
   2644   BotCand.SU = nullptr;
   2645 }
   2646 
   2647 /// Initialize the per-region scheduling policy.
   2648 void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
   2649                                   MachineBasicBlock::iterator End,
   2650                                   unsigned NumRegionInstrs) {
   2651   const MachineFunction &MF = *Begin->getMF();
   2652   const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
   2653 
   2654   // Avoid setting up the register pressure tracker for small regions to save
   2655   // compile time. As a rough heuristic, only track pressure when the number of
   2656   // schedulable instructions exceeds half the integer register file.
   2657   RegionPolicy.ShouldTrackPressure = true;
   2658   for (unsigned VT = MVT::i32; VT > (unsigned)MVT::i1; --VT) {
   2659     MVT::SimpleValueType LegalIntVT = (MVT::SimpleValueType)VT;
   2660     if (TLI->isTypeLegal(LegalIntVT)) {
   2661       unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs(
   2662         TLI->getRegClassFor(LegalIntVT));
   2663       RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2);
   2664     }
   2665   }
   2666 
   2667   // For generic targets, we default to bottom-up, because it's simpler and more
   2668   // compile-time optimizations have been implemented in that direction.
   2669   RegionPolicy.OnlyBottomUp = true;
   2670 
   2671   // Allow the subtarget to override default policy.
   2672   MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
   2673 
   2674   // After subtarget overrides, apply command line options.
   2675   if (!EnableRegPressure)
   2676     RegionPolicy.ShouldTrackPressure = false;
   2677 
   2678   // Check -misched-topdown/bottomup can force or unforce scheduling direction.
   2679   // e.g. -misched-bottomup=false allows scheduling in both directions.
   2680   assert((!ForceTopDown || !ForceBottomUp) &&
   2681          "-misched-topdown incompatible with -misched-bottomup");
   2682   if (ForceBottomUp.getNumOccurrences() > 0) {
   2683     RegionPolicy.OnlyBottomUp = ForceBottomUp;
   2684     if (RegionPolicy.OnlyBottomUp)
   2685       RegionPolicy.OnlyTopDown = false;
   2686   }
   2687   if (ForceTopDown.getNumOccurrences() > 0) {
   2688     RegionPolicy.OnlyTopDown = ForceTopDown;
   2689     if (RegionPolicy.OnlyTopDown)
   2690       RegionPolicy.OnlyBottomUp = false;
   2691   }
   2692 }
   2693 
   2694 void GenericScheduler::dumpPolicy() const {
   2695   // Cannot completely remove virtual function even in release mode.
   2696 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   2697   dbgs() << "GenericScheduler RegionPolicy: "
   2698          << " ShouldTrackPressure=" << RegionPolicy.ShouldTrackPressure
   2699          << " OnlyTopDown=" << RegionPolicy.OnlyTopDown
   2700          << " OnlyBottomUp=" << RegionPolicy.OnlyBottomUp
   2701          << "\n";
   2702 #endif
   2703 }
   2704 
   2705 /// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic
   2706 /// critical path by more cycles than it takes to drain the instruction buffer.
   2707 /// We estimate an upper bounds on in-flight instructions as:
   2708 ///
   2709 /// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height )
   2710 /// InFlightIterations = AcyclicPath / CyclesPerIteration
   2711 /// InFlightResources = InFlightIterations * LoopResources
   2712 ///
   2713 /// TODO: Check execution resources in addition to IssueCount.
   2714 void GenericScheduler::checkAcyclicLatency() {
   2715   if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath)
   2716     return;
   2717 
   2718   // Scaled number of cycles per loop iteration.
   2719   unsigned IterCount =
   2720     std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(),
   2721              Rem.RemIssueCount);
   2722   // Scaled acyclic critical path.
   2723   unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor();
   2724   // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop
   2725   unsigned InFlightCount =
   2726     (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount;
   2727   unsigned BufferLimit =
   2728     SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor();
   2729 
   2730   Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit;
   2731 
   2732   LLVM_DEBUG(
   2733       dbgs() << "IssueCycles="
   2734              << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c "
   2735              << "IterCycles=" << IterCount / SchedModel->getLatencyFactor()
   2736              << "c NumIters=" << (AcyclicCount + IterCount - 1) / IterCount
   2737              << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor()
   2738              << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n";
   2739       if (Rem.IsAcyclicLatencyLimited) dbgs() << "  ACYCLIC LATENCY LIMIT\n");
   2740 }
   2741 
   2742 void GenericScheduler::registerRoots() {
   2743   Rem.CriticalPath = DAG->ExitSU.getDepth();
   2744 
   2745   // Some roots may not feed into ExitSU. Check all of them in case.
   2746   for (const SUnit *SU : Bot.Available) {
   2747     if (SU->getDepth() > Rem.CriticalPath)
   2748       Rem.CriticalPath = SU->getDepth();
   2749   }
   2750   LLVM_DEBUG(dbgs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << '\n');
   2751   if (DumpCriticalPathLength) {
   2752     errs() << "Critical Path(GS-RR ): " << Rem.CriticalPath << " \n";
   2753   }
   2754 
   2755   if (EnableCyclicPath && SchedModel->getMicroOpBufferSize() > 0) {
   2756     Rem.CyclicCritPath = DAG->computeCyclicCriticalPath();
   2757     checkAcyclicLatency();
   2758   }
   2759 }
   2760 
   2761 namespace llvm {
   2762 bool tryPressure(const PressureChange &TryP,
   2763                  const PressureChange &CandP,
   2764                  GenericSchedulerBase::SchedCandidate &TryCand,
   2765                  GenericSchedulerBase::SchedCandidate &Cand,
   2766                  GenericSchedulerBase::CandReason Reason,
   2767                  const TargetRegisterInfo *TRI,
   2768                  const MachineFunction &MF) {
   2769   // If one candidate decreases and the other increases, go with it.
   2770   // Invalid candidates have UnitInc==0.
   2771   if (tryGreater(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand,
   2772                  Reason)) {
   2773     return true;
   2774   }
   2775   // Do not compare the magnitude of pressure changes between top and bottom
   2776   // boundary.
   2777   if (Cand.AtTop != TryCand.AtTop)
   2778     return false;
   2779 
   2780   // If both candidates affect the same set in the same boundary, go with the
   2781   // smallest increase.
   2782   unsigned TryPSet = TryP.getPSetOrMax();
   2783   unsigned CandPSet = CandP.getPSetOrMax();
   2784   if (TryPSet == CandPSet) {
   2785     return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand,
   2786                    Reason);
   2787   }
   2788 
   2789   int TryRank = TryP.isValid() ? TRI->getRegPressureSetScore(MF, TryPSet) :
   2790                                  std::numeric_limits<int>::max();
   2791 
   2792   int CandRank = CandP.isValid() ? TRI->getRegPressureSetScore(MF, CandPSet) :
   2793                                    std::numeric_limits<int>::max();
   2794 
   2795   // If the candidates are decreasing pressure, reverse priority.
   2796   if (TryP.getUnitInc() < 0)
   2797     std::swap(TryRank, CandRank);
   2798   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
   2799 }
   2800 
   2801 unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   2802   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
   2803 }
   2804 
   2805 /// Minimize physical register live ranges. Regalloc wants them adjacent to
   2806 /// their physreg def/use.
   2807 ///
   2808 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
   2809 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
   2810 /// with the operation that produces or consumes the physreg. We'll do this when
   2811 /// regalloc has support for parallel copies.
   2812 int biasPhysRegCopy(const SUnit *SU, bool isTop) {
   2813   const MachineInstr *MI = SU->getInstr();
   2814   if (!MI->isCopy())
   2815     return 0;
   2816 
   2817   unsigned ScheduledOper = isTop ? 1 : 0;
   2818   unsigned UnscheduledOper = isTop ? 0 : 1;
   2819   // If we have already scheduled the physreg produce/consumer, immediately
   2820   // schedule the copy.
   2821   if (TargetRegisterInfo::isPhysicalRegister(
   2822         MI->getOperand(ScheduledOper).getReg()))
   2823     return 1;
   2824   // If the physreg is at the boundary, defer it. Otherwise schedule it
   2825   // immediately to free the dependent. We can hoist the copy later.
   2826   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
   2827   if (TargetRegisterInfo::isPhysicalRegister(
   2828         MI->getOperand(UnscheduledOper).getReg()))
   2829     return AtBoundary ? -1 : 1;
   2830   return 0;
   2831 }
   2832 } // end namespace llvm
   2833 
   2834 void GenericScheduler::initCandidate(SchedCandidate &Cand, SUnit *SU,
   2835                                      bool AtTop,
   2836                                      const RegPressureTracker &RPTracker,
   2837                                      RegPressureTracker &TempTracker) {
   2838   Cand.SU = SU;
   2839   Cand.AtTop = AtTop;
   2840   if (DAG->isTrackingPressure()) {
   2841     if (AtTop) {
   2842       TempTracker.getMaxDownwardPressureDelta(
   2843         Cand.SU->getInstr(),
   2844         Cand.RPDelta,
   2845         DAG->getRegionCriticalPSets(),
   2846         DAG->getRegPressure().MaxSetPressure);
   2847     } else {
   2848       if (VerifyScheduling) {
   2849         TempTracker.getMaxUpwardPressureDelta(
   2850           Cand.SU->getInstr(),
   2851           &DAG->getPressureDiff(Cand.SU),
   2852           Cand.RPDelta,
   2853           DAG->getRegionCriticalPSets(),
   2854           DAG->getRegPressure().MaxSetPressure);
   2855       } else {
   2856         RPTracker.getUpwardPressureDelta(
   2857           Cand.SU->getInstr(),
   2858           DAG->getPressureDiff(Cand.SU),
   2859           Cand.RPDelta,
   2860           DAG->getRegionCriticalPSets(),
   2861           DAG->getRegPressure().MaxSetPressure);
   2862       }
   2863     }
   2864   }
   2865   LLVM_DEBUG(if (Cand.RPDelta.Excess.isValid()) dbgs()
   2866              << "  Try  SU(" << Cand.SU->NodeNum << ") "
   2867              << TRI->getRegPressureSetName(Cand.RPDelta.Excess.getPSet()) << ":"
   2868              << Cand.RPDelta.Excess.getUnitInc() << "\n");
   2869 }
   2870 
   2871 /// Apply a set of heuristics to a new candidate. Heuristics are currently
   2872 /// hierarchical. This may be more efficient than a graduated cost model because
   2873 /// we don't need to evaluate all aspects of the model for each node in the
   2874 /// queue. But it's really done to make the heuristics easier to debug and
   2875 /// statistically analyze.
   2876 ///
   2877 /// \param Cand provides the policy and current best candidate.
   2878 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
   2879 /// \param Zone describes the scheduled zone that we are extending, or nullptr
   2880 //              if Cand is from a different zone than TryCand.
   2881 void GenericScheduler::tryCandidate(SchedCandidate &Cand,
   2882                                     SchedCandidate &TryCand,
   2883                                     SchedBoundary *Zone) const {
   2884   // Initialize the candidate if needed.
   2885   if (!Cand.isValid()) {
   2886     TryCand.Reason = NodeOrder;
   2887     return;
   2888   }
   2889 
   2890   if (tryGreater(biasPhysRegCopy(TryCand.SU, TryCand.AtTop),
   2891                  biasPhysRegCopy(Cand.SU, Cand.AtTop),
   2892                  TryCand, Cand, PhysRegCopy))
   2893     return;
   2894 
   2895   // Avoid exceeding the target's limit.
   2896   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess,
   2897                                                Cand.RPDelta.Excess,
   2898                                                TryCand, Cand, RegExcess, TRI,
   2899                                                DAG->MF))
   2900     return;
   2901 
   2902   // Avoid increasing the max critical pressure in the scheduled region.
   2903   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax,
   2904                                                Cand.RPDelta.CriticalMax,
   2905                                                TryCand, Cand, RegCritical, TRI,
   2906                                                DAG->MF))
   2907     return;
   2908 
   2909   // We only compare a subset of features when comparing nodes between
   2910   // Top and Bottom boundary. Some properties are simply incomparable, in many
   2911   // other instances we should only override the other boundary if something
   2912   // is a clear good pick on one boundary. Skip heuristics that are more
   2913   // "tie-breaking" in nature.
   2914   bool SameBoundary = Zone != nullptr;
   2915   if (SameBoundary) {
   2916     // For loops that are acyclic path limited, aggressively schedule for
   2917     // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
   2918     // heuristics to take precedence.
   2919     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
   2920         tryLatency(TryCand, Cand, *Zone))
   2921       return;
   2922 
   2923     // Prioritize instructions that read unbuffered resources by stall cycles.
   2924     if (tryLess(Zone->getLatencyStallCycles(TryCand.SU),
   2925                 Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
   2926       return;
   2927   }
   2928 
   2929   // Keep clustered nodes together to encourage downstream peephole
   2930   // optimizations which may reduce resource requirements.
   2931   //
   2932   // This is a best effort to set things up for a post-RA pass. Optimizations
   2933   // like generating loads of multiple registers should ideally be done within
   2934   // the scheduler pass by combining the loads during DAG postprocessing.
   2935   const SUnit *CandNextClusterSU =
   2936     Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
   2937   const SUnit *TryCandNextClusterSU =
   2938     TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
   2939   if (tryGreater(TryCand.SU == TryCandNextClusterSU,
   2940                  Cand.SU == CandNextClusterSU,
   2941                  TryCand, Cand, Cluster))
   2942     return;
   2943 
   2944   if (SameBoundary) {
   2945     // Weak edges are for clustering and other constraints.
   2946     if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop),
   2947                 getWeakLeft(Cand.SU, Cand.AtTop),
   2948                 TryCand, Cand, Weak))
   2949       return;
   2950   }
   2951 
   2952   // Avoid increasing the max pressure of the entire region.
   2953   if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax,
   2954                                                Cand.RPDelta.CurrentMax,
   2955                                                TryCand, Cand, RegMax, TRI,
   2956                                                DAG->MF))
   2957     return;
   2958 
   2959   if (SameBoundary) {
   2960     // Avoid critical resource consumption and balance the schedule.
   2961     TryCand.initResourceDelta(DAG, SchedModel);
   2962     if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
   2963                 TryCand, Cand, ResourceReduce))
   2964       return;
   2965     if (tryGreater(TryCand.ResDelta.DemandedResources,
   2966                    Cand.ResDelta.DemandedResources,
   2967                    TryCand, Cand, ResourceDemand))
   2968       return;
   2969 
   2970     // Avoid serializing long latency dependence chains.
   2971     // For acyclic path limited loops, latency was already checked above.
   2972     if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency &&
   2973         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
   2974       return;
   2975 
   2976     // Fall through to original instruction order.
   2977     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
   2978         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
   2979       TryCand.Reason = NodeOrder;
   2980     }
   2981   }
   2982 }
   2983 
   2984 /// Pick the best candidate from the queue.
   2985 ///
   2986 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
   2987 /// DAG building. To adjust for the current scheduling location we need to
   2988 /// maintain the number of vreg uses remaining to be top-scheduled.
   2989 void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
   2990                                          const CandPolicy &ZonePolicy,
   2991                                          const RegPressureTracker &RPTracker,
   2992                                          SchedCandidate &Cand) {
   2993   // getMaxPressureDelta temporarily modifies the tracker.
   2994   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
   2995 
   2996   ReadyQueue &Q = Zone.Available;
   2997   for (SUnit *SU : Q) {
   2998 
   2999     SchedCandidate TryCand(ZonePolicy);
   3000     initCandidate(TryCand, SU, Zone.isTop(), RPTracker, TempTracker);
   3001     // Pass SchedBoundary only when comparing nodes from the same boundary.
   3002     SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
   3003     tryCandidate(Cand, TryCand, ZoneArg);
   3004     if (TryCand.Reason != NoCand) {
   3005       // Initialize resource delta if needed in case future heuristics query it.
   3006       if (TryCand.ResDelta == SchedResourceDelta())
   3007         TryCand.initResourceDelta(DAG, SchedModel);
   3008       Cand.setBest(TryCand);
   3009       LLVM_DEBUG(traceCandidate(Cand));
   3010     }
   3011   }
   3012 }
   3013 
   3014 /// Pick the best candidate node from either the top or bottom queue.
   3015 SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) {
   3016   // Schedule as far as possible in the direction of no choice. This is most
   3017   // efficient, but also provides the best heuristics for CriticalPSets.
   3018   if (SUnit *SU = Bot.pickOnlyChoice()) {
   3019     IsTopNode = false;
   3020     tracePick(Only1, false);
   3021     return SU;
   3022   }
   3023   if (SUnit *SU = Top.pickOnlyChoice()) {
   3024     IsTopNode = true;
   3025     tracePick(Only1, true);
   3026     return SU;
   3027   }
   3028   // Set the bottom-up policy based on the state of the current bottom zone and
   3029   // the instructions outside the zone, including the top zone.
   3030   CandPolicy BotPolicy;
   3031   setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
   3032   // Set the top-down policy based on the state of the current top zone and
   3033   // the instructions outside the zone, including the bottom zone.
   3034   CandPolicy TopPolicy;
   3035   setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
   3036 
   3037   // See if BotCand is still valid (because we previously scheduled from Top).
   3038   LLVM_DEBUG(dbgs() << "Picking from Bot:\n");
   3039   if (!BotCand.isValid() || BotCand.SU->isScheduled ||
   3040       BotCand.Policy != BotPolicy) {
   3041     BotCand.reset(CandPolicy());
   3042     pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
   3043     assert(BotCand.Reason != NoCand && "failed to find the first candidate");
   3044   } else {
   3045     LLVM_DEBUG(traceCandidate(BotCand));
   3046 #ifndef NDEBUG
   3047     if (VerifyScheduling) {
   3048       SchedCandidate TCand;
   3049       TCand.reset(CandPolicy());
   3050       pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), TCand);
   3051       assert(TCand.SU == BotCand.SU &&
   3052              "Last pick result should correspond to re-picking right now");
   3053     }
   3054 #endif
   3055   }
   3056 
   3057   // Check if the top Q has a better candidate.
   3058   LLVM_DEBUG(dbgs() << "Picking from Top:\n");
   3059   if (!TopCand.isValid() || TopCand.SU->isScheduled ||
   3060       TopCand.Policy != TopPolicy) {
   3061     TopCand.reset(CandPolicy());
   3062     pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
   3063     assert(TopCand.Reason != NoCand && "failed to find the first candidate");
   3064   } else {
   3065     LLVM_DEBUG(traceCandidate(TopCand));
   3066 #ifndef NDEBUG
   3067     if (VerifyScheduling) {
   3068       SchedCandidate TCand;
   3069       TCand.reset(CandPolicy());
   3070       pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TCand);
   3071       assert(TCand.SU == TopCand.SU &&
   3072            "Last pick result should correspond to re-picking right now");
   3073     }
   3074 #endif
   3075   }
   3076 
   3077   // Pick best from BotCand and TopCand.
   3078   assert(BotCand.isValid());
   3079   assert(TopCand.isValid());
   3080   SchedCandidate Cand = BotCand;
   3081   TopCand.Reason = NoCand;
   3082   tryCandidate(Cand, TopCand, nullptr);
   3083   if (TopCand.Reason != NoCand) {
   3084     Cand.setBest(TopCand);
   3085     LLVM_DEBUG(traceCandidate(Cand));
   3086   }
   3087 
   3088   IsTopNode = Cand.AtTop;
   3089   tracePick(Cand);
   3090   return Cand.SU;
   3091 }
   3092 
   3093 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
   3094 SUnit *GenericScheduler::pickNode(bool &IsTopNode) {
   3095   if (DAG->top() == DAG->bottom()) {
   3096     assert(Top.Available.empty() && Top.Pending.empty() &&
   3097            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
   3098     return nullptr;
   3099   }
   3100   SUnit *SU;
   3101   do {
   3102     if (RegionPolicy.OnlyTopDown) {
   3103       SU = Top.pickOnlyChoice();
   3104       if (!SU) {
   3105         CandPolicy NoPolicy;
   3106         TopCand.reset(NoPolicy);
   3107         pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
   3108         assert(TopCand.Reason != NoCand && "failed to find a candidate");
   3109         tracePick(TopCand);
   3110         SU = TopCand.SU;
   3111       }
   3112       IsTopNode = true;
   3113     } else if (RegionPolicy.OnlyBottomUp) {
   3114       SU = Bot.pickOnlyChoice();
   3115       if (!SU) {
   3116         CandPolicy NoPolicy;
   3117         BotCand.reset(NoPolicy);
   3118         pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
   3119         assert(BotCand.Reason != NoCand && "failed to find a candidate");
   3120         tracePick(BotCand);
   3121         SU = BotCand.SU;
   3122       }
   3123       IsTopNode = false;
   3124     } else {
   3125       SU = pickNodeBidirectional(IsTopNode);
   3126     }
   3127   } while (SU->isScheduled);
   3128 
   3129   if (SU->isTopReady())
   3130     Top.removeReady(SU);
   3131   if (SU->isBottomReady())
   3132     Bot.removeReady(SU);
   3133 
   3134   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
   3135                     << *SU->getInstr());
   3136   return SU;
   3137 }
   3138 
   3139 void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
   3140   MachineBasicBlock::iterator InsertPos = SU->getInstr();
   3141   if (!isTop)
   3142     ++InsertPos;
   3143   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
   3144 
   3145   // Find already scheduled copies with a single physreg dependence and move
   3146   // them just above the scheduled instruction.
   3147   for (SDep &Dep : Deps) {
   3148     if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
   3149       continue;
   3150     SUnit *DepSU = Dep.getSUnit();
   3151     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
   3152       continue;
   3153     MachineInstr *Copy = DepSU->getInstr();
   3154     if (!Copy->isCopy())
   3155       continue;
   3156     LLVM_DEBUG(dbgs() << "  Rescheduling physreg copy ";
   3157                Dep.getSUnit()->dump(DAG));
   3158     DAG->moveInstruction(Copy, InsertPos);
   3159   }
   3160 }
   3161 
   3162 /// Update the scheduler's state after scheduling a node. This is the same node
   3163 /// that was just returned by pickNode(). However, ScheduleDAGMILive needs to
   3164 /// update it's state based on the current cycle before MachineSchedStrategy
   3165 /// does.
   3166 ///
   3167 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
   3168 /// them here. See comments in biasPhysRegCopy.
   3169 void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   3170   if (IsTopNode) {
   3171     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
   3172     Top.bumpNode(SU);
   3173     if (SU->hasPhysRegUses)
   3174       reschedulePhysRegCopies(SU, true);
   3175   } else {
   3176     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
   3177     Bot.bumpNode(SU);
   3178     if (SU->hasPhysRegDefs)
   3179       reschedulePhysRegCopies(SU, false);
   3180   }
   3181 }
   3182 
   3183 /// Create the standard converging machine scheduler. This will be used as the
   3184 /// default scheduler if the target does not set a default.
   3185 ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
   3186   ScheduleDAGMILive *DAG =
   3187       new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
   3188   // Register DAG post-processors.
   3189   //
   3190   // FIXME: extend the mutation API to allow earlier mutations to instantiate
   3191   // data and pass it to later mutations. Have a single mutation that gathers
   3192   // the interesting nodes in one pass.
   3193   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
   3194   return DAG;
   3195 }
   3196 
   3197 static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
   3198   return createGenericSchedLive(C);
   3199 }
   3200 
   3201 static MachineSchedRegistry
   3202 GenericSchedRegistry("converge", "Standard converging scheduler.",
   3203                      createConveringSched);
   3204 
   3205 //===----------------------------------------------------------------------===//
   3206 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
   3207 //===----------------------------------------------------------------------===//
   3208 
   3209 void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
   3210   DAG = Dag;
   3211   SchedModel = DAG->getSchedModel();
   3212   TRI = DAG->TRI;
   3213 
   3214   Rem.init(DAG, SchedModel);
   3215   Top.init(DAG, SchedModel, &Rem);
   3216   BotRoots.clear();
   3217 
   3218   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
   3219   // or are disabled, then these HazardRecs will be disabled.
   3220   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
   3221   if (!Top.HazardRec) {
   3222     Top.HazardRec =
   3223         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
   3224             Itin, DAG);
   3225   }
   3226 }
   3227 
   3228 void PostGenericScheduler::registerRoots() {
   3229   Rem.CriticalPath = DAG->ExitSU.getDepth();
   3230 
   3231   // Some roots may not feed into ExitSU. Check all of them in case.
   3232   for (const SUnit *SU : BotRoots) {
   3233     if (SU->getDepth() > Rem.CriticalPath)
   3234       Rem.CriticalPath = SU->getDepth();
   3235   }
   3236   LLVM_DEBUG(dbgs() << "Critical Path: (PGS-RR) " << Rem.CriticalPath << '\n');
   3237   if (DumpCriticalPathLength) {
   3238     errs() << "Critical Path(PGS-RR ): " << Rem.CriticalPath << " \n";
   3239   }
   3240 }
   3241 
   3242 /// Apply a set of heuristics to a new candidate for PostRA scheduling.
   3243 ///
   3244 /// \param Cand provides the policy and current best candidate.
   3245 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
   3246 void PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
   3247                                         SchedCandidate &TryCand) {
   3248   // Initialize the candidate if needed.
   3249   if (!Cand.isValid()) {
   3250     TryCand.Reason = NodeOrder;
   3251     return;
   3252   }
   3253 
   3254   // Prioritize instructions that read unbuffered resources by stall cycles.
   3255   if (tryLess(Top.getLatencyStallCycles(TryCand.SU),
   3256               Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall))
   3257     return;
   3258 
   3259   // Keep clustered nodes together.
   3260   if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(),
   3261                  Cand.SU == DAG->getNextClusterSucc(),
   3262                  TryCand, Cand, Cluster))
   3263     return;
   3264 
   3265   // Avoid critical resource consumption and balance the schedule.
   3266   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
   3267               TryCand, Cand, ResourceReduce))
   3268     return;
   3269   if (tryGreater(TryCand.ResDelta.DemandedResources,
   3270                  Cand.ResDelta.DemandedResources,
   3271                  TryCand, Cand, ResourceDemand))
   3272     return;
   3273 
   3274   // Avoid serializing long latency dependence chains.
   3275   if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) {
   3276     return;
   3277   }
   3278 
   3279   // Fall through to original instruction order.
   3280   if (TryCand.SU->NodeNum < Cand.SU->NodeNum)
   3281     TryCand.Reason = NodeOrder;
   3282 }
   3283 
   3284 void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
   3285   ReadyQueue &Q = Top.Available;
   3286   for (SUnit *SU : Q) {
   3287     SchedCandidate TryCand(Cand.Policy);
   3288     TryCand.SU = SU;
   3289     TryCand.AtTop = true;
   3290     TryCand.initResourceDelta(DAG, SchedModel);
   3291     tryCandidate(Cand, TryCand);
   3292     if (TryCand.Reason != NoCand) {
   3293       Cand.setBest(TryCand);
   3294       LLVM_DEBUG(traceCandidate(Cand));
   3295     }
   3296   }
   3297 }
   3298 
   3299 /// Pick the next node to schedule.
   3300 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
   3301   if (DAG->top() == DAG->bottom()) {
   3302     assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
   3303     return nullptr;
   3304   }
   3305   SUnit *SU;
   3306   do {
   3307     SU = Top.pickOnlyChoice();
   3308     if (SU) {
   3309       tracePick(Only1, true);
   3310     } else {
   3311       CandPolicy NoPolicy;
   3312       SchedCandidate TopCand(NoPolicy);
   3313       // Set the top-down policy based on the state of the current top zone and
   3314       // the instructions outside the zone, including the bottom zone.
   3315       setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
   3316       pickNodeFromQueue(TopCand);
   3317       assert(TopCand.Reason != NoCand && "failed to find a candidate");
   3318       tracePick(TopCand);
   3319       SU = TopCand.SU;
   3320     }
   3321   } while (SU->isScheduled);
   3322 
   3323   IsTopNode = true;
   3324   Top.removeReady(SU);
   3325 
   3326   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
   3327                     << *SU->getInstr());
   3328   return SU;
   3329 }
   3330 
   3331 /// Called after ScheduleDAGMI has scheduled an instruction and updated
   3332 /// scheduled/remaining flags in the DAG nodes.
   3333 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   3334   SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
   3335   Top.bumpNode(SU);
   3336 }
   3337 
   3338 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
   3339   return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
   3340                            /*RemoveKillFlags=*/true);
   3341 }
   3342 
   3343 //===----------------------------------------------------------------------===//
   3344 // ILP Scheduler. Currently for experimental analysis of heuristics.
   3345 //===----------------------------------------------------------------------===//
   3346 
   3347 namespace {
   3348 
   3349 /// Order nodes by the ILP metric.
   3350 struct ILPOrder {
   3351   const SchedDFSResult *DFSResult = nullptr;
   3352   const BitVector *ScheduledTrees = nullptr;
   3353   bool MaximizeILP;
   3354 
   3355   ILPOrder(bool MaxILP) : MaximizeILP(MaxILP) {}
   3356 
   3357   /// Apply a less-than relation on node priority.
   3358   ///
   3359   /// (Return true if A comes after B in the Q.)
   3360   bool operator()(const SUnit *A, const SUnit *B) const {
   3361     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
   3362     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
   3363     if (SchedTreeA != SchedTreeB) {
   3364       // Unscheduled trees have lower priority.
   3365       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
   3366         return ScheduledTrees->test(SchedTreeB);
   3367 
   3368       // Trees with shallower connections have have lower priority.
   3369       if (DFSResult->getSubtreeLevel(SchedTreeA)
   3370           != DFSResult->getSubtreeLevel(SchedTreeB)) {
   3371         return DFSResult->getSubtreeLevel(SchedTreeA)
   3372           < DFSResult->getSubtreeLevel(SchedTreeB);
   3373       }
   3374     }
   3375     if (MaximizeILP)
   3376       return DFSResult->getILP(A) < DFSResult->getILP(B);
   3377     else
   3378       return DFSResult->getILP(A) > DFSResult->getILP(B);
   3379   }
   3380 };
   3381 
   3382 /// Schedule based on the ILP metric.
   3383 class ILPScheduler : public MachineSchedStrategy {
   3384   ScheduleDAGMILive *DAG = nullptr;
   3385   ILPOrder Cmp;
   3386 
   3387   std::vector<SUnit*> ReadyQ;
   3388 
   3389 public:
   3390   ILPScheduler(bool MaximizeILP) : Cmp(MaximizeILP) {}
   3391 
   3392   void initialize(ScheduleDAGMI *dag) override {
   3393     assert(dag->hasVRegLiveness() && "ILPScheduler needs vreg liveness");
   3394     DAG = static_cast<ScheduleDAGMILive*>(dag);
   3395     DAG->computeDFSResult();
   3396     Cmp.DFSResult = DAG->getDFSResult();
   3397     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
   3398     ReadyQ.clear();
   3399   }
   3400 
   3401   void registerRoots() override {
   3402     // Restore the heap in ReadyQ with the updated DFS results.
   3403     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   3404   }
   3405 
   3406   /// Implement MachineSchedStrategy interface.
   3407   /// -----------------------------------------
   3408 
   3409   /// Callback to select the highest priority node from the ready Q.
   3410   SUnit *pickNode(bool &IsTopNode) override {
   3411     if (ReadyQ.empty()) return nullptr;
   3412     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   3413     SUnit *SU = ReadyQ.back();
   3414     ReadyQ.pop_back();
   3415     IsTopNode = false;
   3416     LLVM_DEBUG(dbgs() << "Pick node "
   3417                       << "SU(" << SU->NodeNum << ") "
   3418                       << " ILP: " << DAG->getDFSResult()->getILP(SU)
   3419                       << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU)
   3420                       << " @"
   3421                       << DAG->getDFSResult()->getSubtreeLevel(
   3422                              DAG->getDFSResult()->getSubtreeID(SU))
   3423                       << '\n'
   3424                       << "Scheduling " << *SU->getInstr());
   3425     return SU;
   3426   }
   3427 
   3428   /// Scheduler callback to notify that a new subtree is scheduled.
   3429   void scheduleTree(unsigned SubtreeID) override {
   3430     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   3431   }
   3432 
   3433   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
   3434   /// DFSResults, and resort the priority Q.
   3435   void schedNode(SUnit *SU, bool IsTopNode) override {
   3436     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
   3437   }
   3438 
   3439   void releaseTopNode(SUnit *) override { /*only called for top roots*/ }
   3440 
   3441   void releaseBottomNode(SUnit *SU) override {
   3442     ReadyQ.push_back(SU);
   3443     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   3444   }
   3445 };
   3446 
   3447 } // end anonymous namespace
   3448 
   3449 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
   3450   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
   3451 }
   3452 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
   3453   return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
   3454 }
   3455 
   3456 static MachineSchedRegistry ILPMaxRegistry(
   3457   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
   3458 static MachineSchedRegistry ILPMinRegistry(
   3459   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
   3460 
   3461 //===----------------------------------------------------------------------===//
   3462 // Machine Instruction Shuffler for Correctness Testing
   3463 //===----------------------------------------------------------------------===//
   3464 
   3465 #ifndef NDEBUG
   3466 namespace {
   3467 
   3468 /// Apply a less-than relation on the node order, which corresponds to the
   3469 /// instruction order prior to scheduling. IsReverse implements greater-than.
   3470 template<bool IsReverse>
   3471 struct SUnitOrder {
   3472   bool operator()(SUnit *A, SUnit *B) const {
   3473     if (IsReverse)
   3474       return A->NodeNum > B->NodeNum;
   3475     else
   3476       return A->NodeNum < B->NodeNum;
   3477   }
   3478 };
   3479 
   3480 /// Reorder instructions as much as possible.
   3481 class InstructionShuffler : public MachineSchedStrategy {
   3482   bool IsAlternating;
   3483   bool IsTopDown;
   3484 
   3485   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
   3486   // gives nodes with a higher number higher priority causing the latest
   3487   // instructions to be scheduled first.
   3488   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false>>
   3489     TopQ;
   3490 
   3491   // When scheduling bottom-up, use greater-than as the queue priority.
   3492   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true>>
   3493     BottomQ;
   3494 
   3495 public:
   3496   InstructionShuffler(bool alternate, bool topdown)
   3497     : IsAlternating(alternate), IsTopDown(topdown) {}
   3498 
   3499   void initialize(ScheduleDAGMI*) override {
   3500     TopQ.clear();
   3501     BottomQ.clear();
   3502   }
   3503 
   3504   /// Implement MachineSchedStrategy interface.
   3505   /// -----------------------------------------
   3506 
   3507   SUnit *pickNode(bool &IsTopNode) override {
   3508     SUnit *SU;
   3509     if (IsTopDown) {
   3510       do {
   3511         if (TopQ.empty()) return nullptr;
   3512         SU = TopQ.top();
   3513         TopQ.pop();
   3514       } while (SU->isScheduled);
   3515       IsTopNode = true;
   3516     } else {
   3517       do {
   3518         if (BottomQ.empty()) return nullptr;
   3519         SU = BottomQ.top();
   3520         BottomQ.pop();
   3521       } while (SU->isScheduled);
   3522       IsTopNode = false;
   3523     }
   3524     if (IsAlternating)
   3525       IsTopDown = !IsTopDown;
   3526     return SU;
   3527   }
   3528 
   3529   void schedNode(SUnit *SU, bool IsTopNode) override {}
   3530 
   3531   void releaseTopNode(SUnit *SU) override {
   3532     TopQ.push(SU);
   3533   }
   3534   void releaseBottomNode(SUnit *SU) override {
   3535     BottomQ.push(SU);
   3536   }
   3537 };
   3538 
   3539 } // end anonymous namespace
   3540 
   3541 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
   3542   bool Alternate = !ForceTopDown && !ForceBottomUp;
   3543   bool TopDown = !ForceBottomUp;
   3544   assert((TopDown || !ForceTopDown) &&
   3545          "-misched-topdown incompatible with -misched-bottomup");
   3546   return new ScheduleDAGMILive(
   3547       C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
   3548 }
   3549 
   3550 static MachineSchedRegistry ShufflerRegistry(
   3551   "shuffle", "Shuffle machine instructions alternating directions",
   3552   createInstructionShuffler);
   3553 #endif // !NDEBUG
   3554 
   3555 //===----------------------------------------------------------------------===//
   3556 // GraphWriter support for ScheduleDAGMILive.
   3557 //===----------------------------------------------------------------------===//
   3558 
   3559 #ifndef NDEBUG
   3560 namespace llvm {
   3561 
   3562 template<> struct GraphTraits<
   3563   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
   3564 
   3565 template<>
   3566 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
   3567   DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
   3568 
   3569   static std::string getGraphName(const ScheduleDAG *G) {
   3570     return G->MF.getName();
   3571   }
   3572 
   3573   static bool renderGraphFromBottomUp() {
   3574     return true;
   3575   }
   3576 
   3577   static bool isNodeHidden(const SUnit *Node) {
   3578     if (ViewMISchedCutoff == 0)
   3579       return false;
   3580     return (Node->Preds.size() > ViewMISchedCutoff
   3581          || Node->Succs.size() > ViewMISchedCutoff);
   3582   }
   3583 
   3584   /// If you want to override the dot attributes printed for a particular
   3585   /// edge, override this method.
   3586   static std::string getEdgeAttributes(const SUnit *Node,
   3587                                        SUnitIterator EI,
   3588                                        const ScheduleDAG *Graph) {
   3589     if (EI.isArtificialDep())
   3590       return "color=cyan,style=dashed";
   3591     if (EI.isCtrlDep())
   3592       return "color=blue,style=dashed";
   3593     return "";
   3594   }
   3595 
   3596   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
   3597     std::string Str;
   3598     raw_string_ostream SS(Str);
   3599     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
   3600     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
   3601       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
   3602     SS << "SU:" << SU->NodeNum;
   3603     if (DFS)
   3604       SS << " I:" << DFS->getNumInstrs(SU);
   3605     return SS.str();
   3606   }
   3607 
   3608   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
   3609     return G->getGraphNodeLabel(SU);
   3610   }
   3611 
   3612   static std::string getNodeAttributes(const SUnit *N, const ScheduleDAG *G) {
   3613     std::string Str("shape=Mrecord");
   3614     const ScheduleDAGMI *DAG = static_cast<const ScheduleDAGMI*>(G);
   3615     const SchedDFSResult *DFS = DAG->hasVRegLiveness() ?
   3616       static_cast<const ScheduleDAGMILive*>(G)->getDFSResult() : nullptr;
   3617     if (DFS) {
   3618       Str += ",style=filled,fillcolor=\"#";
   3619       Str += DOT::getColorString(DFS->getSubtreeID(N));
   3620       Str += '"';
   3621     }
   3622     return Str;
   3623   }
   3624 };
   3625 
   3626 } // end namespace llvm
   3627 #endif // NDEBUG
   3628 
   3629 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
   3630 /// rendered using 'dot'.
   3631 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
   3632 #ifndef NDEBUG
   3633   ViewGraph(this, Name, false, Title);
   3634 #else
   3635   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
   3636          << "systems with Graphviz or gv!\n";
   3637 #endif  // NDEBUG
   3638 }
   3639 
   3640 /// Out-of-line implementation with no arguments is handy for gdb.
   3641 void ScheduleDAGMI::viewGraph() {
   3642   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
   3643 }
   3644