Home | History | Annotate | Download | only in CodeGen
      1 //===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // MachineScheduler schedules machine instructions after phi elimination. It
     11 // preserves LiveIntervals so it can be invoked before register allocation.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "misched"
     16 
     17 #include "llvm/CodeGen/MachineScheduler.h"
     18 #include "llvm/ADT/OwningPtr.h"
     19 #include "llvm/ADT/PriorityQueue.h"
     20 #include "llvm/Analysis/AliasAnalysis.h"
     21 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
     22 #include "llvm/CodeGen/MachineDominators.h"
     23 #include "llvm/CodeGen/MachineLoopInfo.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 #include "llvm/CodeGen/Passes.h"
     26 #include "llvm/CodeGen/RegisterClassInfo.h"
     27 #include "llvm/CodeGen/ScheduleDFS.h"
     28 #include "llvm/CodeGen/ScheduleHazardRecognizer.h"
     29 #include "llvm/Support/CommandLine.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/Support/GraphWriter.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include "llvm/Target/TargetInstrInfo.h"
     35 #include <queue>
     36 
     37 using namespace llvm;
     38 
     39 namespace llvm {
     40 cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
     41                            cl::desc("Force top-down list scheduling"));
     42 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
     43                             cl::desc("Force bottom-up list scheduling"));
     44 }
     45 
     46 #ifndef NDEBUG
     47 static cl::opt<bool> ViewMISchedDAGs("view-misched-dags", cl::Hidden,
     48   cl::desc("Pop up a window to show MISched dags after they are processed"));
     49 
     50 static cl::opt<unsigned> MISchedCutoff("misched-cutoff", cl::Hidden,
     51   cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
     52 #else
     53 static bool ViewMISchedDAGs = false;
     54 #endif // NDEBUG
     55 
     56 static cl::opt<bool> EnableLoadCluster("misched-cluster", cl::Hidden,
     57   cl::desc("Enable load clustering."), cl::init(true));
     58 
     59 // Experimental heuristics
     60 static cl::opt<bool> EnableMacroFusion("misched-fusion", cl::Hidden,
     61   cl::desc("Enable scheduling for macro fusion."), cl::init(true));
     62 
     63 static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
     64   cl::desc("Verify machine instrs before and after machine scheduling"));
     65 
     66 // DAG subtrees must have at least this many nodes.
     67 static const unsigned MinSubtreeSize = 8;
     68 
     69 //===----------------------------------------------------------------------===//
     70 // Machine Instruction Scheduling Pass and Registry
     71 //===----------------------------------------------------------------------===//
     72 
     73 MachineSchedContext::MachineSchedContext():
     74     MF(0), MLI(0), MDT(0), PassConfig(0), AA(0), LIS(0) {
     75   RegClassInfo = new RegisterClassInfo();
     76 }
     77 
     78 MachineSchedContext::~MachineSchedContext() {
     79   delete RegClassInfo;
     80 }
     81 
     82 namespace {
     83 /// MachineScheduler runs after coalescing and before register allocation.
     84 class MachineScheduler : public MachineSchedContext,
     85                          public MachineFunctionPass {
     86 public:
     87   MachineScheduler();
     88 
     89   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
     90 
     91   virtual void releaseMemory() {}
     92 
     93   virtual bool runOnMachineFunction(MachineFunction&);
     94 
     95   virtual void print(raw_ostream &O, const Module* = 0) const;
     96 
     97   static char ID; // Class identification, replacement for typeinfo
     98 };
     99 } // namespace
    100 
    101 char MachineScheduler::ID = 0;
    102 
    103 char &llvm::MachineSchedulerID = MachineScheduler::ID;
    104 
    105 INITIALIZE_PASS_BEGIN(MachineScheduler, "misched",
    106                       "Machine Instruction Scheduler", false, false)
    107 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
    108 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    109 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    110 INITIALIZE_PASS_END(MachineScheduler, "misched",
    111                     "Machine Instruction Scheduler", false, false)
    112 
    113 MachineScheduler::MachineScheduler()
    114 : MachineFunctionPass(ID) {
    115   initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
    116 }
    117 
    118 void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
    119   AU.setPreservesCFG();
    120   AU.addRequiredID(MachineDominatorsID);
    121   AU.addRequired<MachineLoopInfo>();
    122   AU.addRequired<AliasAnalysis>();
    123   AU.addRequired<TargetPassConfig>();
    124   AU.addRequired<SlotIndexes>();
    125   AU.addPreserved<SlotIndexes>();
    126   AU.addRequired<LiveIntervals>();
    127   AU.addPreserved<LiveIntervals>();
    128   MachineFunctionPass::getAnalysisUsage(AU);
    129 }
    130 
    131 MachinePassRegistry MachineSchedRegistry::Registry;
    132 
    133 /// A dummy default scheduler factory indicates whether the scheduler
    134 /// is overridden on the command line.
    135 static ScheduleDAGInstrs *useDefaultMachineSched(MachineSchedContext *C) {
    136   return 0;
    137 }
    138 
    139 /// MachineSchedOpt allows command line selection of the scheduler.
    140 static cl::opt<MachineSchedRegistry::ScheduleDAGCtor, false,
    141                RegisterPassParser<MachineSchedRegistry> >
    142 MachineSchedOpt("misched",
    143                 cl::init(&useDefaultMachineSched), cl::Hidden,
    144                 cl::desc("Machine instruction scheduler to use"));
    145 
    146 static MachineSchedRegistry
    147 DefaultSchedRegistry("default", "Use the target's default scheduler choice.",
    148                      useDefaultMachineSched);
    149 
    150 /// Forward declare the standard machine scheduler. This will be used as the
    151 /// default scheduler if the target does not set a default.
    152 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C);
    153 
    154 
    155 /// Decrement this iterator until reaching the top or a non-debug instr.
    156 static MachineBasicBlock::iterator
    157 priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
    158   assert(I != Beg && "reached the top of the region, cannot decrement");
    159   while (--I != Beg) {
    160     if (!I->isDebugValue())
    161       break;
    162   }
    163   return I;
    164 }
    165 
    166 /// If this iterator is a debug value, increment until reaching the End or a
    167 /// non-debug instruction.
    168 static MachineBasicBlock::iterator
    169 nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
    170   for(; I != End; ++I) {
    171     if (!I->isDebugValue())
    172       break;
    173   }
    174   return I;
    175 }
    176 
    177 /// Top-level MachineScheduler pass driver.
    178 ///
    179 /// Visit blocks in function order. Divide each block into scheduling regions
    180 /// and visit them bottom-up. Visiting regions bottom-up is not required, but is
    181 /// consistent with the DAG builder, which traverses the interior of the
    182 /// scheduling regions bottom-up.
    183 ///
    184 /// This design avoids exposing scheduling boundaries to the DAG builder,
    185 /// simplifying the DAG builder's support for "special" target instructions.
    186 /// At the same time the design allows target schedulers to operate across
    187 /// scheduling boundaries, for example to bundle the boudary instructions
    188 /// without reordering them. This creates complexity, because the target
    189 /// scheduler must update the RegionBegin and RegionEnd positions cached by
    190 /// ScheduleDAGInstrs whenever adding or removing instructions. A much simpler
    191 /// design would be to split blocks at scheduling boundaries, but LLVM has a
    192 /// general bias against block splitting purely for implementation simplicity.
    193 bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
    194   DEBUG(dbgs() << "Before MISsched:\n"; mf.print(dbgs()));
    195 
    196   // Initialize the context of the pass.
    197   MF = &mf;
    198   MLI = &getAnalysis<MachineLoopInfo>();
    199   MDT = &getAnalysis<MachineDominatorTree>();
    200   PassConfig = &getAnalysis<TargetPassConfig>();
    201   AA = &getAnalysis<AliasAnalysis>();
    202 
    203   LIS = &getAnalysis<LiveIntervals>();
    204   const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
    205 
    206   if (VerifyScheduling) {
    207     DEBUG(LIS->dump());
    208     MF->verify(this, "Before machine scheduling.");
    209   }
    210   RegClassInfo->runOnMachineFunction(*MF);
    211 
    212   // Select the scheduler, or set the default.
    213   MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt;
    214   if (Ctor == useDefaultMachineSched) {
    215     // Get the default scheduler set by the target.
    216     Ctor = MachineSchedRegistry::getDefault();
    217     if (!Ctor) {
    218       Ctor = createConvergingSched;
    219       MachineSchedRegistry::setDefault(Ctor);
    220     }
    221   }
    222   // Instantiate the selected scheduler.
    223   OwningPtr<ScheduleDAGInstrs> Scheduler(Ctor(this));
    224 
    225   // Visit all machine basic blocks.
    226   //
    227   // TODO: Visit blocks in global postorder or postorder within the bottom-up
    228   // loop tree. Then we can optionally compute global RegPressure.
    229   for (MachineFunction::iterator MBB = MF->begin(), MBBEnd = MF->end();
    230        MBB != MBBEnd; ++MBB) {
    231 
    232     Scheduler->startBlock(MBB);
    233 
    234     // Break the block into scheduling regions [I, RegionEnd), and schedule each
    235     // region as soon as it is discovered. RegionEnd points the scheduling
    236     // boundary at the bottom of the region. The DAG does not include RegionEnd,
    237     // but the region does (i.e. the next RegionEnd is above the previous
    238     // RegionBegin). If the current block has no terminator then RegionEnd ==
    239     // MBB->end() for the bottom region.
    240     //
    241     // The Scheduler may insert instructions during either schedule() or
    242     // exitRegion(), even for empty regions. So the local iterators 'I' and
    243     // 'RegionEnd' are invalid across these calls.
    244     unsigned RemainingInstrs = MBB->size();
    245     for(MachineBasicBlock::iterator RegionEnd = MBB->end();
    246         RegionEnd != MBB->begin(); RegionEnd = Scheduler->begin()) {
    247 
    248       // Avoid decrementing RegionEnd for blocks with no terminator.
    249       if (RegionEnd != MBB->end()
    250           || TII->isSchedulingBoundary(llvm::prior(RegionEnd), MBB, *MF)) {
    251         --RegionEnd;
    252         // Count the boundary instruction.
    253         --RemainingInstrs;
    254       }
    255 
    256       // The next region starts above the previous region. Look backward in the
    257       // instruction stream until we find the nearest boundary.
    258       MachineBasicBlock::iterator I = RegionEnd;
    259       for(;I != MBB->begin(); --I, --RemainingInstrs) {
    260         if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF))
    261           break;
    262       }
    263       // Notify the scheduler of the region, even if we may skip scheduling
    264       // it. Perhaps it still needs to be bundled.
    265       Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs);
    266 
    267       // Skip empty scheduling regions (0 or 1 schedulable instructions).
    268       if (I == RegionEnd || I == llvm::prior(RegionEnd)) {
    269         // Close the current region. Bundle the terminator if needed.
    270         // This invalidates 'RegionEnd' and 'I'.
    271         Scheduler->exitRegion();
    272         continue;
    273       }
    274       DEBUG(dbgs() << "********** MI Scheduling **********\n");
    275       DEBUG(dbgs() << MF->getName()
    276             << ":BB#" << MBB->getNumber() << " " << MBB->getName()
    277             << "\n  From: " << *I << "    To: ";
    278             if (RegionEnd != MBB->end()) dbgs() << *RegionEnd;
    279             else dbgs() << "End";
    280             dbgs() << " Remaining: " << RemainingInstrs << "\n");
    281 
    282       // Schedule a region: possibly reorder instructions.
    283       // This invalidates 'RegionEnd' and 'I'.
    284       Scheduler->schedule();
    285 
    286       // Close the current region.
    287       Scheduler->exitRegion();
    288 
    289       // Scheduling has invalidated the current iterator 'I'. Ask the
    290       // scheduler for the top of it's scheduled region.
    291       RegionEnd = Scheduler->begin();
    292     }
    293     assert(RemainingInstrs == 0 && "Instruction count mismatch!");
    294     Scheduler->finishBlock();
    295   }
    296   Scheduler->finalizeSchedule();
    297   DEBUG(LIS->dump());
    298   if (VerifyScheduling)
    299     MF->verify(this, "After machine scheduling.");
    300   return true;
    301 }
    302 
    303 void MachineScheduler::print(raw_ostream &O, const Module* m) const {
    304   // unimplemented
    305 }
    306 
    307 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    308 void ReadyQueue::dump() {
    309   dbgs() << Name << ": ";
    310   for (unsigned i = 0, e = Queue.size(); i < e; ++i)
    311     dbgs() << Queue[i]->NodeNum << " ";
    312   dbgs() << "\n";
    313 }
    314 #endif
    315 
    316 //===----------------------------------------------------------------------===//
    317 // ScheduleDAGMI - Base class for MachineInstr scheduling with LiveIntervals
    318 // preservation.
    319 //===----------------------------------------------------------------------===//
    320 
    321 ScheduleDAGMI::~ScheduleDAGMI() {
    322   delete DFSResult;
    323   DeleteContainerPointers(Mutations);
    324   delete SchedImpl;
    325 }
    326 
    327 bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
    328   return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
    329 }
    330 
    331 bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
    332   if (SuccSU != &ExitSU) {
    333     // Do not use WillCreateCycle, it assumes SD scheduling.
    334     // If Pred is reachable from Succ, then the edge creates a cycle.
    335     if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
    336       return false;
    337     Topo.AddPred(SuccSU, PredDep.getSUnit());
    338   }
    339   SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
    340   // Return true regardless of whether a new edge needed to be inserted.
    341   return true;
    342 }
    343 
    344 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
    345 /// NumPredsLeft reaches zero, release the successor node.
    346 ///
    347 /// FIXME: Adjust SuccSU height based on MinLatency.
    348 void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) {
    349   SUnit *SuccSU = SuccEdge->getSUnit();
    350 
    351   if (SuccEdge->isWeak()) {
    352     --SuccSU->WeakPredsLeft;
    353     if (SuccEdge->isCluster())
    354       NextClusterSucc = SuccSU;
    355     return;
    356   }
    357 #ifndef NDEBUG
    358   if (SuccSU->NumPredsLeft == 0) {
    359     dbgs() << "*** Scheduling failed! ***\n";
    360     SuccSU->dump(this);
    361     dbgs() << " has been released too many times!\n";
    362     llvm_unreachable(0);
    363   }
    364 #endif
    365   --SuccSU->NumPredsLeft;
    366   if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
    367     SchedImpl->releaseTopNode(SuccSU);
    368 }
    369 
    370 /// releaseSuccessors - Call releaseSucc on each of SU's successors.
    371 void ScheduleDAGMI::releaseSuccessors(SUnit *SU) {
    372   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
    373        I != E; ++I) {
    374     releaseSucc(SU, &*I);
    375   }
    376 }
    377 
    378 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
    379 /// NumSuccsLeft reaches zero, release the predecessor node.
    380 ///
    381 /// FIXME: Adjust PredSU height based on MinLatency.
    382 void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) {
    383   SUnit *PredSU = PredEdge->getSUnit();
    384 
    385   if (PredEdge->isWeak()) {
    386     --PredSU->WeakSuccsLeft;
    387     if (PredEdge->isCluster())
    388       NextClusterPred = PredSU;
    389     return;
    390   }
    391 #ifndef NDEBUG
    392   if (PredSU->NumSuccsLeft == 0) {
    393     dbgs() << "*** Scheduling failed! ***\n";
    394     PredSU->dump(this);
    395     dbgs() << " has been released too many times!\n";
    396     llvm_unreachable(0);
    397   }
    398 #endif
    399   --PredSU->NumSuccsLeft;
    400   if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
    401     SchedImpl->releaseBottomNode(PredSU);
    402 }
    403 
    404 /// releasePredecessors - Call releasePred on each of SU's predecessors.
    405 void ScheduleDAGMI::releasePredecessors(SUnit *SU) {
    406   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
    407        I != E; ++I) {
    408     releasePred(SU, &*I);
    409   }
    410 }
    411 
    412 /// This is normally called from the main scheduler loop but may also be invoked
    413 /// by the scheduling strategy to perform additional code motion.
    414 void ScheduleDAGMI::moveInstruction(MachineInstr *MI,
    415                                     MachineBasicBlock::iterator InsertPos) {
    416   // Advance RegionBegin if the first instruction moves down.
    417   if (&*RegionBegin == MI)
    418     ++RegionBegin;
    419 
    420   // Update the instruction stream.
    421   BB->splice(InsertPos, BB, MI);
    422 
    423   // Update LiveIntervals
    424   LIS->handleMove(MI, /*UpdateFlags=*/true);
    425 
    426   // Recede RegionBegin if an instruction moves above the first.
    427   if (RegionBegin == InsertPos)
    428     RegionBegin = MI;
    429 }
    430 
    431 bool ScheduleDAGMI::checkSchedLimit() {
    432 #ifndef NDEBUG
    433   if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
    434     CurrentTop = CurrentBottom;
    435     return false;
    436   }
    437   ++NumInstrsScheduled;
    438 #endif
    439   return true;
    440 }
    441 
    442 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
    443 /// crossing a scheduling boundary. [begin, end) includes all instructions in
    444 /// the region, including the boundary itself and single-instruction regions
    445 /// that don't get scheduled.
    446 void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb,
    447                                 MachineBasicBlock::iterator begin,
    448                                 MachineBasicBlock::iterator end,
    449                                 unsigned endcount)
    450 {
    451   ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
    452 
    453   // For convenience remember the end of the liveness region.
    454   LiveRegionEnd =
    455     (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
    456 }
    457 
    458 // Setup the register pressure trackers for the top scheduled top and bottom
    459 // scheduled regions.
    460 void ScheduleDAGMI::initRegPressure() {
    461   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
    462   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
    463 
    464   // Close the RPTracker to finalize live ins.
    465   RPTracker.closeRegion();
    466 
    467   DEBUG(RPTracker.dump());
    468 
    469   // Initialize the live ins and live outs.
    470   TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
    471   BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
    472 
    473   // Close one end of the tracker so we can call
    474   // getMaxUpward/DownwardPressureDelta before advancing across any
    475   // instructions. This converts currently live regs into live ins/outs.
    476   TopRPTracker.closeTop();
    477   BotRPTracker.closeBottom();
    478 
    479   BotRPTracker.initLiveThru(RPTracker);
    480   if (!BotRPTracker.getLiveThru().empty()) {
    481     TopRPTracker.initLiveThru(BotRPTracker.getLiveThru());
    482     DEBUG(dbgs() << "Live Thru: ";
    483           dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI));
    484   };
    485 
    486   // Account for liveness generated by the region boundary.
    487   if (LiveRegionEnd != RegionEnd)
    488     BotRPTracker.recede();
    489 
    490   assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
    491 
    492   // Cache the list of excess pressure sets in this region. This will also track
    493   // the max pressure in the scheduled code for these sets.
    494   RegionCriticalPSets.clear();
    495   const std::vector<unsigned> &RegionPressure =
    496     RPTracker.getPressure().MaxSetPressure;
    497   for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
    498     unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    499     if (RegionPressure[i] > Limit) {
    500       DEBUG(dbgs() << TRI->getRegPressureSetName(i)
    501             << " Limit " << Limit
    502             << " Actual " << RegionPressure[i] << "\n");
    503       RegionCriticalPSets.push_back(PressureElement(i, 0));
    504     }
    505   }
    506   DEBUG(dbgs() << "Excess PSets: ";
    507         for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
    508           dbgs() << TRI->getRegPressureSetName(
    509             RegionCriticalPSets[i].PSetID) << " ";
    510         dbgs() << "\n");
    511 }
    512 
    513 // FIXME: When the pressure tracker deals in pressure differences then we won't
    514 // iterate over all RegionCriticalPSets[i].
    515 void ScheduleDAGMI::
    516 updateScheduledPressure(const std::vector<unsigned> &NewMaxPressure) {
    517   for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
    518     unsigned ID = RegionCriticalPSets[i].PSetID;
    519     int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
    520     if ((int)NewMaxPressure[ID] > MaxUnits)
    521       MaxUnits = NewMaxPressure[ID];
    522   }
    523   DEBUG(
    524     for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) {
    525       unsigned Limit = RegClassInfo->getRegPressureSetLimit(i);
    526       if (NewMaxPressure[i] > Limit ) {
    527         dbgs() << "  " << TRI->getRegPressureSetName(i) << ": "
    528                << NewMaxPressure[i] << " > " << Limit << "\n";
    529       }
    530     });
    531 }
    532 
    533 /// schedule - Called back from MachineScheduler::runOnMachineFunction
    534 /// after setting up the current scheduling region. [RegionBegin, RegionEnd)
    535 /// only includes instructions that have DAG nodes, not scheduling boundaries.
    536 ///
    537 /// This is a skeletal driver, with all the functionality pushed into helpers,
    538 /// so that it can be easilly extended by experimental schedulers. Generally,
    539 /// implementing MachineSchedStrategy should be sufficient to implement a new
    540 /// scheduling algorithm. However, if a scheduler further subclasses
    541 /// ScheduleDAGMI then it will want to override this virtual method in order to
    542 /// update any specialized state.
    543 void ScheduleDAGMI::schedule() {
    544   buildDAGWithRegPressure();
    545 
    546   Topo.InitDAGTopologicalSorting();
    547 
    548   postprocessDAG();
    549 
    550   SmallVector<SUnit*, 8> TopRoots, BotRoots;
    551   findRootsAndBiasEdges(TopRoots, BotRoots);
    552 
    553   // Initialize the strategy before modifying the DAG.
    554   // This may initialize a DFSResult to be used for queue priority.
    555   SchedImpl->initialize(this);
    556 
    557   DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
    558           SUnits[su].dumpAll(this));
    559   if (ViewMISchedDAGs) viewGraph();
    560 
    561   // Initialize ready queues now that the DAG and priority data are finalized.
    562   initQueues(TopRoots, BotRoots);
    563 
    564   bool IsTopNode = false;
    565   while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
    566     assert(!SU->isScheduled && "Node already scheduled");
    567     if (!checkSchedLimit())
    568       break;
    569 
    570     scheduleMI(SU, IsTopNode);
    571 
    572     updateQueues(SU, IsTopNode);
    573   }
    574   assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
    575 
    576   placeDebugValues();
    577 
    578   DEBUG({
    579       unsigned BBNum = begin()->getParent()->getNumber();
    580       dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
    581       dumpSchedule();
    582       dbgs() << '\n';
    583     });
    584 }
    585 
    586 /// Build the DAG and setup three register pressure trackers.
    587 void ScheduleDAGMI::buildDAGWithRegPressure() {
    588   // Initialize the register pressure tracker used by buildSchedGraph.
    589   RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
    590                  /*TrackUntiedDefs=*/true);
    591 
    592   // Account for liveness generate by the region boundary.
    593   if (LiveRegionEnd != RegionEnd)
    594     RPTracker.recede();
    595 
    596   // Build the DAG, and compute current register pressure.
    597   buildSchedGraph(AA, &RPTracker);
    598 
    599   // Initialize top/bottom trackers after computing region pressure.
    600   initRegPressure();
    601 }
    602 
    603 /// Apply each ScheduleDAGMutation step in order.
    604 void ScheduleDAGMI::postprocessDAG() {
    605   for (unsigned i = 0, e = Mutations.size(); i < e; ++i) {
    606     Mutations[i]->apply(this);
    607   }
    608 }
    609 
    610 void ScheduleDAGMI::computeDFSResult() {
    611   if (!DFSResult)
    612     DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize);
    613   DFSResult->clear();
    614   ScheduledTrees.clear();
    615   DFSResult->resize(SUnits.size());
    616   DFSResult->compute(SUnits);
    617   ScheduledTrees.resize(DFSResult->getNumSubtrees());
    618 }
    619 
    620 void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl<SUnit*> &TopRoots,
    621                                           SmallVectorImpl<SUnit*> &BotRoots) {
    622   for (std::vector<SUnit>::iterator
    623          I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
    624     SUnit *SU = &(*I);
    625     assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits");
    626 
    627     // Order predecessors so DFSResult follows the critical path.
    628     SU->biasCriticalPath();
    629 
    630     // A SUnit is ready to top schedule if it has no predecessors.
    631     if (!I->NumPredsLeft)
    632       TopRoots.push_back(SU);
    633     // A SUnit is ready to bottom schedule if it has no successors.
    634     if (!I->NumSuccsLeft)
    635       BotRoots.push_back(SU);
    636   }
    637   ExitSU.biasCriticalPath();
    638 }
    639 
    640 /// Identify DAG roots and setup scheduler queues.
    641 void ScheduleDAGMI::initQueues(ArrayRef<SUnit*> TopRoots,
    642                                ArrayRef<SUnit*> BotRoots) {
    643   NextClusterSucc = NULL;
    644   NextClusterPred = NULL;
    645 
    646   // Release all DAG roots for scheduling, not including EntrySU/ExitSU.
    647   //
    648   // Nodes with unreleased weak edges can still be roots.
    649   // Release top roots in forward order.
    650   for (SmallVectorImpl<SUnit*>::const_iterator
    651          I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) {
    652     SchedImpl->releaseTopNode(*I);
    653   }
    654   // Release bottom roots in reverse order so the higher priority nodes appear
    655   // first. This is more natural and slightly more efficient.
    656   for (SmallVectorImpl<SUnit*>::const_reverse_iterator
    657          I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) {
    658     SchedImpl->releaseBottomNode(*I);
    659   }
    660 
    661   releaseSuccessors(&EntrySU);
    662   releasePredecessors(&ExitSU);
    663 
    664   SchedImpl->registerRoots();
    665 
    666   // Advance past initial DebugValues.
    667   assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
    668   CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
    669   TopRPTracker.setPos(CurrentTop);
    670 
    671   CurrentBottom = RegionEnd;
    672 }
    673 
    674 /// Move an instruction and update register pressure.
    675 void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) {
    676   // Move the instruction to its new location in the instruction stream.
    677   MachineInstr *MI = SU->getInstr();
    678 
    679   if (IsTopNode) {
    680     assert(SU->isTopReady() && "node still has unscheduled dependencies");
    681     if (&*CurrentTop == MI)
    682       CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
    683     else {
    684       moveInstruction(MI, CurrentTop);
    685       TopRPTracker.setPos(MI);
    686     }
    687 
    688     // Update top scheduled pressure.
    689     TopRPTracker.advance();
    690     assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
    691     updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
    692   }
    693   else {
    694     assert(SU->isBottomReady() && "node still has unscheduled dependencies");
    695     MachineBasicBlock::iterator priorII =
    696       priorNonDebug(CurrentBottom, CurrentTop);
    697     if (&*priorII == MI)
    698       CurrentBottom = priorII;
    699     else {
    700       if (&*CurrentTop == MI) {
    701         CurrentTop = nextIfDebug(++CurrentTop, priorII);
    702         TopRPTracker.setPos(CurrentTop);
    703       }
    704       moveInstruction(MI, CurrentBottom);
    705       CurrentBottom = MI;
    706     }
    707     // Update bottom scheduled pressure.
    708     BotRPTracker.recede();
    709     assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
    710     updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
    711   }
    712 }
    713 
    714 /// Update scheduler queues after scheduling an instruction.
    715 void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) {
    716   // Release dependent instructions for scheduling.
    717   if (IsTopNode)
    718     releaseSuccessors(SU);
    719   else
    720     releasePredecessors(SU);
    721 
    722   SU->isScheduled = true;
    723 
    724   if (DFSResult) {
    725     unsigned SubtreeID = DFSResult->getSubtreeID(SU);
    726     if (!ScheduledTrees.test(SubtreeID)) {
    727       ScheduledTrees.set(SubtreeID);
    728       DFSResult->scheduleTree(SubtreeID);
    729       SchedImpl->scheduleTree(SubtreeID);
    730     }
    731   }
    732 
    733   // Notify the scheduling strategy after updating the DAG.
    734   SchedImpl->schedNode(SU, IsTopNode);
    735 }
    736 
    737 /// Reinsert any remaining debug_values, just like the PostRA scheduler.
    738 void ScheduleDAGMI::placeDebugValues() {
    739   // If first instruction was a DBG_VALUE then put it back.
    740   if (FirstDbgValue) {
    741     BB->splice(RegionBegin, BB, FirstDbgValue);
    742     RegionBegin = FirstDbgValue;
    743   }
    744 
    745   for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
    746          DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
    747     std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
    748     MachineInstr *DbgValue = P.first;
    749     MachineBasicBlock::iterator OrigPrevMI = P.second;
    750     if (&*RegionBegin == DbgValue)
    751       ++RegionBegin;
    752     BB->splice(++OrigPrevMI, BB, DbgValue);
    753     if (OrigPrevMI == llvm::prior(RegionEnd))
    754       RegionEnd = DbgValue;
    755   }
    756   DbgValues.clear();
    757   FirstDbgValue = NULL;
    758 }
    759 
    760 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    761 void ScheduleDAGMI::dumpSchedule() const {
    762   for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) {
    763     if (SUnit *SU = getSUnit(&(*MI)))
    764       SU->dump(this);
    765     else
    766       dbgs() << "Missing SUnit\n";
    767   }
    768 }
    769 #endif
    770 
    771 //===----------------------------------------------------------------------===//
    772 // LoadClusterMutation - DAG post-processing to cluster loads.
    773 //===----------------------------------------------------------------------===//
    774 
    775 namespace {
    776 /// \brief Post-process the DAG to create cluster edges between neighboring
    777 /// loads.
    778 class LoadClusterMutation : public ScheduleDAGMutation {
    779   struct LoadInfo {
    780     SUnit *SU;
    781     unsigned BaseReg;
    782     unsigned Offset;
    783     LoadInfo(SUnit *su, unsigned reg, unsigned ofs)
    784       : SU(su), BaseReg(reg), Offset(ofs) {}
    785   };
    786   static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS,
    787                            const LoadClusterMutation::LoadInfo &RHS);
    788 
    789   const TargetInstrInfo *TII;
    790   const TargetRegisterInfo *TRI;
    791 public:
    792   LoadClusterMutation(const TargetInstrInfo *tii,
    793                       const TargetRegisterInfo *tri)
    794     : TII(tii), TRI(tri) {}
    795 
    796   virtual void apply(ScheduleDAGMI *DAG);
    797 protected:
    798   void clusterNeighboringLoads(ArrayRef<SUnit*> Loads, ScheduleDAGMI *DAG);
    799 };
    800 } // anonymous
    801 
    802 bool LoadClusterMutation::LoadInfoLess(
    803   const LoadClusterMutation::LoadInfo &LHS,
    804   const LoadClusterMutation::LoadInfo &RHS) {
    805   if (LHS.BaseReg != RHS.BaseReg)
    806     return LHS.BaseReg < RHS.BaseReg;
    807   return LHS.Offset < RHS.Offset;
    808 }
    809 
    810 void LoadClusterMutation::clusterNeighboringLoads(ArrayRef<SUnit*> Loads,
    811                                                   ScheduleDAGMI *DAG) {
    812   SmallVector<LoadClusterMutation::LoadInfo,32> LoadRecords;
    813   for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) {
    814     SUnit *SU = Loads[Idx];
    815     unsigned BaseReg;
    816     unsigned Offset;
    817     if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI))
    818       LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset));
    819   }
    820   if (LoadRecords.size() < 2)
    821     return;
    822   std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess);
    823   unsigned ClusterLength = 1;
    824   for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) {
    825     if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) {
    826       ClusterLength = 1;
    827       continue;
    828     }
    829 
    830     SUnit *SUa = LoadRecords[Idx].SU;
    831     SUnit *SUb = LoadRecords[Idx+1].SU;
    832     if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength)
    833         && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
    834 
    835       DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU("
    836             << SUb->NodeNum << ")\n");
    837       // Copy successor edges from SUa to SUb. Interleaving computation
    838       // dependent on SUa can prevent load combining due to register reuse.
    839       // Predecessor edges do not need to be copied from SUb to SUa since nearby
    840       // loads should have effectively the same inputs.
    841       for (SUnit::const_succ_iterator
    842              SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) {
    843         if (SI->getSUnit() == SUb)
    844           continue;
    845         DEBUG(dbgs() << "  Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n");
    846         DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial));
    847       }
    848       ++ClusterLength;
    849     }
    850     else
    851       ClusterLength = 1;
    852   }
    853 }
    854 
    855 /// \brief Callback from DAG postProcessing to create cluster edges for loads.
    856 void LoadClusterMutation::apply(ScheduleDAGMI *DAG) {
    857   // Map DAG NodeNum to store chain ID.
    858   DenseMap<unsigned, unsigned> StoreChainIDs;
    859   // Map each store chain to a set of dependent loads.
    860   SmallVector<SmallVector<SUnit*,4>, 32> StoreChainDependents;
    861   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
    862     SUnit *SU = &DAG->SUnits[Idx];
    863     if (!SU->getInstr()->mayLoad())
    864       continue;
    865     unsigned ChainPredID = DAG->SUnits.size();
    866     for (SUnit::const_pred_iterator
    867            PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
    868       if (PI->isCtrl()) {
    869         ChainPredID = PI->getSUnit()->NodeNum;
    870         break;
    871       }
    872     }
    873     // Check if this chain-like pred has been seen
    874     // before. ChainPredID==MaxNodeID for loads at the top of the schedule.
    875     unsigned NumChains = StoreChainDependents.size();
    876     std::pair<DenseMap<unsigned, unsigned>::iterator, bool> Result =
    877       StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains));
    878     if (Result.second)
    879       StoreChainDependents.resize(NumChains + 1);
    880     StoreChainDependents[Result.first->second].push_back(SU);
    881   }
    882   // Iterate over the store chains.
    883   for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx)
    884     clusterNeighboringLoads(StoreChainDependents[Idx], DAG);
    885 }
    886 
    887 //===----------------------------------------------------------------------===//
    888 // MacroFusion - DAG post-processing to encourage fusion of macro ops.
    889 //===----------------------------------------------------------------------===//
    890 
    891 namespace {
    892 /// \brief Post-process the DAG to create cluster edges between instructions
    893 /// that may be fused by the processor into a single operation.
    894 class MacroFusion : public ScheduleDAGMutation {
    895   const TargetInstrInfo *TII;
    896 public:
    897   MacroFusion(const TargetInstrInfo *tii): TII(tii) {}
    898 
    899   virtual void apply(ScheduleDAGMI *DAG);
    900 };
    901 } // anonymous
    902 
    903 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
    904 /// fused operations.
    905 void MacroFusion::apply(ScheduleDAGMI *DAG) {
    906   // For now, assume targets can only fuse with the branch.
    907   MachineInstr *Branch = DAG->ExitSU.getInstr();
    908   if (!Branch)
    909     return;
    910 
    911   for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) {
    912     SUnit *SU = &DAG->SUnits[--Idx];
    913     if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch))
    914       continue;
    915 
    916     // Create a single weak edge from SU to ExitSU. The only effect is to cause
    917     // bottom-up scheduling to heavily prioritize the clustered SU.  There is no
    918     // need to copy predecessor edges from ExitSU to SU, since top-down
    919     // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling
    920     // of SU, we could create an artificial edge from the deepest root, but it
    921     // hasn't been needed yet.
    922     bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster));
    923     (void)Success;
    924     assert(Success && "No DAG nodes should be reachable from ExitSU");
    925 
    926     DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n");
    927     break;
    928   }
    929 }
    930 
    931 //===----------------------------------------------------------------------===//
    932 // CopyConstrain - DAG post-processing to encourage copy elimination.
    933 //===----------------------------------------------------------------------===//
    934 
    935 namespace {
    936 /// \brief Post-process the DAG to create weak edges from all uses of a copy to
    937 /// the one use that defines the copy's source vreg, most likely an induction
    938 /// variable increment.
    939 class CopyConstrain : public ScheduleDAGMutation {
    940   // Transient state.
    941   SlotIndex RegionBeginIdx;
    942   // RegionEndIdx is the slot index of the last non-debug instruction in the
    943   // scheduling region. So we may have RegionBeginIdx == RegionEndIdx.
    944   SlotIndex RegionEndIdx;
    945 public:
    946   CopyConstrain(const TargetInstrInfo *, const TargetRegisterInfo *) {}
    947 
    948   virtual void apply(ScheduleDAGMI *DAG);
    949 
    950 protected:
    951   void constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG);
    952 };
    953 } // anonymous
    954 
    955 /// constrainLocalCopy handles two possibilities:
    956 /// 1) Local src:
    957 /// I0:     = dst
    958 /// I1: src = ...
    959 /// I2:     = dst
    960 /// I3: dst = src (copy)
    961 /// (create pred->succ edges I0->I1, I2->I1)
    962 ///
    963 /// 2) Local copy:
    964 /// I0: dst = src (copy)
    965 /// I1:     = dst
    966 /// I2: src = ...
    967 /// I3:     = dst
    968 /// (create pred->succ edges I1->I2, I3->I2)
    969 ///
    970 /// Although the MachineScheduler is currently constrained to single blocks,
    971 /// this algorithm should handle extended blocks. An EBB is a set of
    972 /// contiguously numbered blocks such that the previous block in the EBB is
    973 /// always the single predecessor.
    974 void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) {
    975   LiveIntervals *LIS = DAG->getLIS();
    976   MachineInstr *Copy = CopySU->getInstr();
    977 
    978   // Check for pure vreg copies.
    979   unsigned SrcReg = Copy->getOperand(1).getReg();
    980   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
    981     return;
    982 
    983   unsigned DstReg = Copy->getOperand(0).getReg();
    984   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
    985     return;
    986 
    987   // Check if either the dest or source is local. If it's live across a back
    988   // edge, it's not local. Note that if both vregs are live across the back
    989   // edge, we cannot successfully contrain the copy without cyclic scheduling.
    990   unsigned LocalReg = DstReg;
    991   unsigned GlobalReg = SrcReg;
    992   LiveInterval *LocalLI = &LIS->getInterval(LocalReg);
    993   if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx)) {
    994     LocalReg = SrcReg;
    995     GlobalReg = DstReg;
    996     LocalLI = &LIS->getInterval(LocalReg);
    997     if (!LocalLI->isLocal(RegionBeginIdx, RegionEndIdx))
    998       return;
    999   }
   1000   LiveInterval *GlobalLI = &LIS->getInterval(GlobalReg);
   1001 
   1002   // Find the global segment after the start of the local LI.
   1003   LiveInterval::iterator GlobalSegment = GlobalLI->find(LocalLI->beginIndex());
   1004   // If GlobalLI does not overlap LocalLI->start, then a copy directly feeds a
   1005   // local live range. We could create edges from other global uses to the local
   1006   // start, but the coalescer should have already eliminated these cases, so
   1007   // don't bother dealing with it.
   1008   if (GlobalSegment == GlobalLI->end())
   1009     return;
   1010 
   1011   // If GlobalSegment is killed at the LocalLI->start, the call to find()
   1012   // returned the next global segment. But if GlobalSegment overlaps with
   1013   // LocalLI->start, then advance to the next segement. If a hole in GlobalLI
   1014   // exists in LocalLI's vicinity, GlobalSegment will be the end of the hole.
   1015   if (GlobalSegment->contains(LocalLI->beginIndex()))
   1016     ++GlobalSegment;
   1017 
   1018   if (GlobalSegment == GlobalLI->end())
   1019     return;
   1020 
   1021   // Check if GlobalLI contains a hole in the vicinity of LocalLI.
   1022   if (GlobalSegment != GlobalLI->begin()) {
   1023     // Two address defs have no hole.
   1024     if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->end,
   1025                                GlobalSegment->start)) {
   1026       return;
   1027     }
   1028     // If the prior global segment may be defined by the same two-address
   1029     // instruction that also defines LocalLI, then can't make a hole here.
   1030     if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start,
   1031                                LocalLI->beginIndex())) {
   1032       return;
   1033     }
   1034     // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise
   1035     // it would be a disconnected component in the live range.
   1036     assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() &&
   1037            "Disconnected LRG within the scheduling region.");
   1038   }
   1039   MachineInstr *GlobalDef = LIS->getInstructionFromIndex(GlobalSegment->start);
   1040   if (!GlobalDef)
   1041     return;
   1042 
   1043   SUnit *GlobalSU = DAG->getSUnit(GlobalDef);
   1044   if (!GlobalSU)
   1045     return;
   1046 
   1047   // GlobalDef is the bottom of the GlobalLI hole. Open the hole by
   1048   // constraining the uses of the last local def to precede GlobalDef.
   1049   SmallVector<SUnit*,8> LocalUses;
   1050   const VNInfo *LastLocalVN = LocalLI->getVNInfoBefore(LocalLI->endIndex());
   1051   MachineInstr *LastLocalDef = LIS->getInstructionFromIndex(LastLocalVN->def);
   1052   SUnit *LastLocalSU = DAG->getSUnit(LastLocalDef);
   1053   for (SUnit::const_succ_iterator
   1054          I = LastLocalSU->Succs.begin(), E = LastLocalSU->Succs.end();
   1055        I != E; ++I) {
   1056     if (I->getKind() != SDep::Data || I->getReg() != LocalReg)
   1057       continue;
   1058     if (I->getSUnit() == GlobalSU)
   1059       continue;
   1060     if (!DAG->canAddEdge(GlobalSU, I->getSUnit()))
   1061       return;
   1062     LocalUses.push_back(I->getSUnit());
   1063   }
   1064   // Open the top of the GlobalLI hole by constraining any earlier global uses
   1065   // to precede the start of LocalLI.
   1066   SmallVector<SUnit*,8> GlobalUses;
   1067   MachineInstr *FirstLocalDef =
   1068     LIS->getInstructionFromIndex(LocalLI->beginIndex());
   1069   SUnit *FirstLocalSU = DAG->getSUnit(FirstLocalDef);
   1070   for (SUnit::const_pred_iterator
   1071          I = GlobalSU->Preds.begin(), E = GlobalSU->Preds.end(); I != E; ++I) {
   1072     if (I->getKind() != SDep::Anti || I->getReg() != GlobalReg)
   1073       continue;
   1074     if (I->getSUnit() == FirstLocalSU)
   1075       continue;
   1076     if (!DAG->canAddEdge(FirstLocalSU, I->getSUnit()))
   1077       return;
   1078     GlobalUses.push_back(I->getSUnit());
   1079   }
   1080   DEBUG(dbgs() << "Constraining copy SU(" << CopySU->NodeNum << ")\n");
   1081   // Add the weak edges.
   1082   for (SmallVectorImpl<SUnit*>::const_iterator
   1083          I = LocalUses.begin(), E = LocalUses.end(); I != E; ++I) {
   1084     DEBUG(dbgs() << "  Local use SU(" << (*I)->NodeNum << ") -> SU("
   1085           << GlobalSU->NodeNum << ")\n");
   1086     DAG->addEdge(GlobalSU, SDep(*I, SDep::Weak));
   1087   }
   1088   for (SmallVectorImpl<SUnit*>::const_iterator
   1089          I = GlobalUses.begin(), E = GlobalUses.end(); I != E; ++I) {
   1090     DEBUG(dbgs() << "  Global use SU(" << (*I)->NodeNum << ") -> SU("
   1091           << FirstLocalSU->NodeNum << ")\n");
   1092     DAG->addEdge(FirstLocalSU, SDep(*I, SDep::Weak));
   1093   }
   1094 }
   1095 
   1096 /// \brief Callback from DAG postProcessing to create weak edges to encourage
   1097 /// copy elimination.
   1098 void CopyConstrain::apply(ScheduleDAGMI *DAG) {
   1099   MachineBasicBlock::iterator FirstPos = nextIfDebug(DAG->begin(), DAG->end());
   1100   if (FirstPos == DAG->end())
   1101     return;
   1102   RegionBeginIdx = DAG->getLIS()->getInstructionIndex(&*FirstPos);
   1103   RegionEndIdx = DAG->getLIS()->getInstructionIndex(
   1104     &*priorNonDebug(DAG->end(), DAG->begin()));
   1105 
   1106   for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) {
   1107     SUnit *SU = &DAG->SUnits[Idx];
   1108     if (!SU->getInstr()->isCopy())
   1109       continue;
   1110 
   1111     constrainLocalCopy(SU, DAG);
   1112   }
   1113 }
   1114 
   1115 //===----------------------------------------------------------------------===//
   1116 // ConvergingScheduler - Implementation of the generic MachineSchedStrategy.
   1117 //===----------------------------------------------------------------------===//
   1118 
   1119 namespace {
   1120 /// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance
   1121 /// the schedule.
   1122 class ConvergingScheduler : public MachineSchedStrategy {
   1123 public:
   1124   /// Represent the type of SchedCandidate found within a single queue.
   1125   /// pickNodeBidirectional depends on these listed by decreasing priority.
   1126   enum CandReason {
   1127     NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax,
   1128     ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce,
   1129     TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder};
   1130 
   1131 #ifndef NDEBUG
   1132   static const char *getReasonStr(ConvergingScheduler::CandReason Reason);
   1133 #endif
   1134 
   1135   /// Policy for scheduling the next instruction in the candidate's zone.
   1136   struct CandPolicy {
   1137     bool ReduceLatency;
   1138     unsigned ReduceResIdx;
   1139     unsigned DemandResIdx;
   1140 
   1141     CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {}
   1142   };
   1143 
   1144   /// Status of an instruction's critical resource consumption.
   1145   struct SchedResourceDelta {
   1146     // Count critical resources in the scheduled region required by SU.
   1147     unsigned CritResources;
   1148 
   1149     // Count critical resources from another region consumed by SU.
   1150     unsigned DemandedResources;
   1151 
   1152     SchedResourceDelta(): CritResources(0), DemandedResources(0) {}
   1153 
   1154     bool operator==(const SchedResourceDelta &RHS) const {
   1155       return CritResources == RHS.CritResources
   1156         && DemandedResources == RHS.DemandedResources;
   1157     }
   1158     bool operator!=(const SchedResourceDelta &RHS) const {
   1159       return !operator==(RHS);
   1160     }
   1161   };
   1162 
   1163   /// Store the state used by ConvergingScheduler heuristics, required for the
   1164   /// lifetime of one invocation of pickNode().
   1165   struct SchedCandidate {
   1166     CandPolicy Policy;
   1167 
   1168     // The best SUnit candidate.
   1169     SUnit *SU;
   1170 
   1171     // The reason for this candidate.
   1172     CandReason Reason;
   1173 
   1174     // Set of reasons that apply to multiple candidates.
   1175     uint32_t RepeatReasonSet;
   1176 
   1177     // Register pressure values for the best candidate.
   1178     RegPressureDelta RPDelta;
   1179 
   1180     // Critical resource consumption of the best candidate.
   1181     SchedResourceDelta ResDelta;
   1182 
   1183     SchedCandidate(const CandPolicy &policy)
   1184       : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {}
   1185 
   1186     bool isValid() const { return SU; }
   1187 
   1188     // Copy the status of another candidate without changing policy.
   1189     void setBest(SchedCandidate &Best) {
   1190       assert(Best.Reason != NoCand && "uninitialized Sched candidate");
   1191       SU = Best.SU;
   1192       Reason = Best.Reason;
   1193       RPDelta = Best.RPDelta;
   1194       ResDelta = Best.ResDelta;
   1195     }
   1196 
   1197     bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); }
   1198     void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); }
   1199 
   1200     void initResourceDelta(const ScheduleDAGMI *DAG,
   1201                            const TargetSchedModel *SchedModel);
   1202   };
   1203 
   1204   /// Summarize the unscheduled region.
   1205   struct SchedRemainder {
   1206     // Critical path through the DAG in expected latency.
   1207     unsigned CriticalPath;
   1208 
   1209     // Scaled count of micro-ops left to schedule.
   1210     unsigned RemIssueCount;
   1211 
   1212     // Unscheduled resources
   1213     SmallVector<unsigned, 16> RemainingCounts;
   1214 
   1215     void reset() {
   1216       CriticalPath = 0;
   1217       RemIssueCount = 0;
   1218       RemainingCounts.clear();
   1219     }
   1220 
   1221     SchedRemainder() { reset(); }
   1222 
   1223     void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel);
   1224   };
   1225 
   1226   /// Each Scheduling boundary is associated with ready queues. It tracks the
   1227   /// current cycle in the direction of movement, and maintains the state
   1228   /// of "hazards" and other interlocks at the current cycle.
   1229   struct SchedBoundary {
   1230     ScheduleDAGMI *DAG;
   1231     const TargetSchedModel *SchedModel;
   1232     SchedRemainder *Rem;
   1233 
   1234     ReadyQueue Available;
   1235     ReadyQueue Pending;
   1236     bool CheckPending;
   1237 
   1238     // For heuristics, keep a list of the nodes that immediately depend on the
   1239     // most recently scheduled node.
   1240     SmallPtrSet<const SUnit*, 8> NextSUs;
   1241 
   1242     ScheduleHazardRecognizer *HazardRec;
   1243 
   1244     /// Number of cycles it takes to issue the instructions scheduled in this
   1245     /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
   1246     /// See getStalls().
   1247     unsigned CurrCycle;
   1248 
   1249     /// Micro-ops issued in the current cycle
   1250     unsigned CurrMOps;
   1251 
   1252     /// MinReadyCycle - Cycle of the soonest available instruction.
   1253     unsigned MinReadyCycle;
   1254 
   1255     // The expected latency of the critical path in this scheduled zone.
   1256     unsigned ExpectedLatency;
   1257 
   1258     // The latency of dependence chains leading into this zone.
   1259     // For each node scheduled bottom-up: DLat = max DLat, N.Depth.
   1260     // For each cycle scheduled: DLat -= 1.
   1261     unsigned DependentLatency;
   1262 
   1263     /// Count the scheduled (issued) micro-ops that can be retired by
   1264     /// time=CurrCycle assuming the first scheduled instr is retired at time=0.
   1265     unsigned RetiredMOps;
   1266 
   1267     // Count scheduled resources that have been executed. Resources are
   1268     // considered executed if they become ready in the time that it takes to
   1269     // saturate any resource including the one in question. Counts are scaled
   1270     // for direct comparison with other resources. Counts can be compared with
   1271     // MOps * getMicroOpFactor and Latency * getLatencyFactor.
   1272     SmallVector<unsigned, 16> ExecutedResCounts;
   1273 
   1274     /// Cache the max count for a single resource.
   1275     unsigned MaxExecutedResCount;
   1276 
   1277     // Cache the critical resources ID in this scheduled zone.
   1278     unsigned ZoneCritResIdx;
   1279 
   1280     // Is the scheduled region resource limited vs. latency limited.
   1281     bool IsResourceLimited;
   1282 
   1283 #ifndef NDEBUG
   1284     // Remember the greatest operand latency as an upper bound on the number of
   1285     // times we should retry the pending queue because of a hazard.
   1286     unsigned MaxObservedLatency;
   1287 #endif
   1288 
   1289     void reset() {
   1290       // A new HazardRec is created for each DAG and owned by SchedBoundary.
   1291       delete HazardRec;
   1292 
   1293       Available.clear();
   1294       Pending.clear();
   1295       CheckPending = false;
   1296       NextSUs.clear();
   1297       HazardRec = 0;
   1298       CurrCycle = 0;
   1299       CurrMOps = 0;
   1300       MinReadyCycle = UINT_MAX;
   1301       ExpectedLatency = 0;
   1302       DependentLatency = 0;
   1303       RetiredMOps = 0;
   1304       MaxExecutedResCount = 0;
   1305       ZoneCritResIdx = 0;
   1306       IsResourceLimited = false;
   1307 #ifndef NDEBUG
   1308       MaxObservedLatency = 0;
   1309 #endif
   1310       // Reserve a zero-count for invalid CritResIdx.
   1311       ExecutedResCounts.resize(1);
   1312       assert(!ExecutedResCounts[0] && "nonzero count for bad resource");
   1313     }
   1314 
   1315     /// Pending queues extend the ready queues with the same ID and the
   1316     /// PendingFlag set.
   1317     SchedBoundary(unsigned ID, const Twine &Name):
   1318       DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"),
   1319       Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
   1320       HazardRec(0) {
   1321       reset();
   1322     }
   1323 
   1324     ~SchedBoundary() { delete HazardRec; }
   1325 
   1326     void init(ScheduleDAGMI *dag, const TargetSchedModel *smodel,
   1327               SchedRemainder *rem);
   1328 
   1329     bool isTop() const {
   1330       return Available.getID() == ConvergingScheduler::TopQID;
   1331     }
   1332 
   1333 #ifndef NDEBUG
   1334     const char *getResourceName(unsigned PIdx) {
   1335       if (!PIdx)
   1336         return "MOps";
   1337       return SchedModel->getProcResource(PIdx)->Name;
   1338     }
   1339 #endif
   1340 
   1341     /// Get the number of latency cycles "covered" by the scheduled
   1342     /// instructions. This is the larger of the critical path within the zone
   1343     /// and the number of cycles required to issue the instructions.
   1344     unsigned getScheduledLatency() const {
   1345       return std::max(ExpectedLatency, CurrCycle);
   1346     }
   1347 
   1348     unsigned getUnscheduledLatency(SUnit *SU) const {
   1349       return isTop() ? SU->getHeight() : SU->getDepth();
   1350     }
   1351 
   1352     unsigned getResourceCount(unsigned ResIdx) const {
   1353       return ExecutedResCounts[ResIdx];
   1354     }
   1355 
   1356     /// Get the scaled count of scheduled micro-ops and resources, including
   1357     /// executed resources.
   1358     unsigned getCriticalCount() const {
   1359       if (!ZoneCritResIdx)
   1360         return RetiredMOps * SchedModel->getMicroOpFactor();
   1361       return getResourceCount(ZoneCritResIdx);
   1362     }
   1363 
   1364     /// Get a scaled count for the minimum execution time of the scheduled
   1365     /// micro-ops that are ready to execute by getExecutedCount. Notice the
   1366     /// feedback loop.
   1367     unsigned getExecutedCount() const {
   1368       return std::max(CurrCycle * SchedModel->getLatencyFactor(),
   1369                       MaxExecutedResCount);
   1370     }
   1371 
   1372     bool checkHazard(SUnit *SU);
   1373 
   1374     unsigned findMaxLatency(ArrayRef<SUnit*> ReadySUs);
   1375 
   1376     unsigned getOtherResourceCount(unsigned &OtherCritIdx);
   1377 
   1378     void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone);
   1379 
   1380     void releaseNode(SUnit *SU, unsigned ReadyCycle);
   1381 
   1382     void bumpCycle(unsigned NextCycle);
   1383 
   1384     void incExecutedResources(unsigned PIdx, unsigned Count);
   1385 
   1386     unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle);
   1387 
   1388     void bumpNode(SUnit *SU);
   1389 
   1390     void releasePending();
   1391 
   1392     void removeReady(SUnit *SU);
   1393 
   1394     SUnit *pickOnlyChoice();
   1395 
   1396 #ifndef NDEBUG
   1397     void dumpScheduledState();
   1398 #endif
   1399   };
   1400 
   1401 private:
   1402   ScheduleDAGMI *DAG;
   1403   const TargetSchedModel *SchedModel;
   1404   const TargetRegisterInfo *TRI;
   1405 
   1406   // State of the top and bottom scheduled instruction boundaries.
   1407   SchedRemainder Rem;
   1408   SchedBoundary Top;
   1409   SchedBoundary Bot;
   1410 
   1411 public:
   1412   /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
   1413   enum {
   1414     TopQID = 1,
   1415     BotQID = 2,
   1416     LogMaxQID = 2
   1417   };
   1418 
   1419   ConvergingScheduler():
   1420     DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
   1421 
   1422   virtual void initialize(ScheduleDAGMI *dag);
   1423 
   1424   virtual SUnit *pickNode(bool &IsTopNode);
   1425 
   1426   virtual void schedNode(SUnit *SU, bool IsTopNode);
   1427 
   1428   virtual void releaseTopNode(SUnit *SU);
   1429 
   1430   virtual void releaseBottomNode(SUnit *SU);
   1431 
   1432   virtual void registerRoots();
   1433 
   1434 protected:
   1435   void tryCandidate(SchedCandidate &Cand,
   1436                     SchedCandidate &TryCand,
   1437                     SchedBoundary &Zone,
   1438                     const RegPressureTracker &RPTracker,
   1439                     RegPressureTracker &TempTracker);
   1440 
   1441   SUnit *pickNodeBidirectional(bool &IsTopNode);
   1442 
   1443   void pickNodeFromQueue(SchedBoundary &Zone,
   1444                          const RegPressureTracker &RPTracker,
   1445                          SchedCandidate &Candidate);
   1446 
   1447   void reschedulePhysRegCopies(SUnit *SU, bool isTop);
   1448 
   1449 #ifndef NDEBUG
   1450   void traceCandidate(const SchedCandidate &Cand);
   1451 #endif
   1452 };
   1453 } // namespace
   1454 
   1455 void ConvergingScheduler::SchedRemainder::
   1456 init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) {
   1457   reset();
   1458   if (!SchedModel->hasInstrSchedModel())
   1459     return;
   1460   RemainingCounts.resize(SchedModel->getNumProcResourceKinds());
   1461   for (std::vector<SUnit>::iterator
   1462          I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) {
   1463     const MCSchedClassDesc *SC = DAG->getSchedClass(&*I);
   1464     RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC)
   1465       * SchedModel->getMicroOpFactor();
   1466     for (TargetSchedModel::ProcResIter
   1467            PI = SchedModel->getWriteProcResBegin(SC),
   1468            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   1469       unsigned PIdx = PI->ProcResourceIdx;
   1470       unsigned Factor = SchedModel->getResourceFactor(PIdx);
   1471       RemainingCounts[PIdx] += (Factor * PI->Cycles);
   1472     }
   1473   }
   1474 }
   1475 
   1476 void ConvergingScheduler::SchedBoundary::
   1477 init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
   1478   reset();
   1479   DAG = dag;
   1480   SchedModel = smodel;
   1481   Rem = rem;
   1482   if (SchedModel->hasInstrSchedModel())
   1483     ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
   1484 }
   1485 
   1486 void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
   1487   DAG = dag;
   1488   SchedModel = DAG->getSchedModel();
   1489   TRI = DAG->TRI;
   1490 
   1491   Rem.init(DAG, SchedModel);
   1492   Top.init(DAG, SchedModel, &Rem);
   1493   Bot.init(DAG, SchedModel, &Rem);
   1494 
   1495   // Initialize resource counts.
   1496 
   1497   // Initialize the HazardRecognizers. If itineraries don't exist, are empty, or
   1498   // are disabled, then these HazardRecs will be disabled.
   1499   const InstrItineraryData *Itin = SchedModel->getInstrItineraries();
   1500   const TargetMachine &TM = DAG->MF.getTarget();
   1501   Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   1502   Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
   1503 
   1504   assert((!ForceTopDown || !ForceBottomUp) &&
   1505          "-misched-topdown incompatible with -misched-bottomup");
   1506 }
   1507 
   1508 void ConvergingScheduler::releaseTopNode(SUnit *SU) {
   1509   if (SU->isScheduled)
   1510     return;
   1511 
   1512   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
   1513        I != E; ++I) {
   1514     if (I->isWeak())
   1515       continue;
   1516     unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
   1517     unsigned Latency = I->getLatency();
   1518 #ifndef NDEBUG
   1519     Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency);
   1520 #endif
   1521     if (SU->TopReadyCycle < PredReadyCycle + Latency)
   1522       SU->TopReadyCycle = PredReadyCycle + Latency;
   1523   }
   1524   Top.releaseNode(SU, SU->TopReadyCycle);
   1525 }
   1526 
   1527 void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
   1528   if (SU->isScheduled)
   1529     return;
   1530 
   1531   assert(SU->getInstr() && "Scheduled SUnit must have instr");
   1532 
   1533   for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
   1534        I != E; ++I) {
   1535     if (I->isWeak())
   1536       continue;
   1537     unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
   1538     unsigned Latency = I->getLatency();
   1539 #ifndef NDEBUG
   1540     Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency);
   1541 #endif
   1542     if (SU->BotReadyCycle < SuccReadyCycle + Latency)
   1543       SU->BotReadyCycle = SuccReadyCycle + Latency;
   1544   }
   1545   Bot.releaseNode(SU, SU->BotReadyCycle);
   1546 }
   1547 
   1548 void ConvergingScheduler::registerRoots() {
   1549   Rem.CriticalPath = DAG->ExitSU.getDepth();
   1550   // Some roots may not feed into ExitSU. Check all of them in case.
   1551   for (std::vector<SUnit*>::const_iterator
   1552          I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) {
   1553     if ((*I)->getDepth() > Rem.CriticalPath)
   1554       Rem.CriticalPath = (*I)->getDepth();
   1555   }
   1556   DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n');
   1557 }
   1558 
   1559 /// Does this SU have a hazard within the current instruction group.
   1560 ///
   1561 /// The scheduler supports two modes of hazard recognition. The first is the
   1562 /// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
   1563 /// supports highly complicated in-order reservation tables
   1564 /// (ScoreboardHazardRecognizer) and arbitraty target-specific logic.
   1565 ///
   1566 /// The second is a streamlined mechanism that checks for hazards based on
   1567 /// simple counters that the scheduler itself maintains. It explicitly checks
   1568 /// for instruction dispatch limitations, including the number of micro-ops that
   1569 /// can dispatch per cycle.
   1570 ///
   1571 /// TODO: Also check whether the SU must start a new group.
   1572 bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) {
   1573   if (HazardRec->isEnabled())
   1574     return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
   1575 
   1576   unsigned uops = SchedModel->getNumMicroOps(SU->getInstr());
   1577   if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) {
   1578     DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") uops="
   1579           << SchedModel->getNumMicroOps(SU->getInstr()) << '\n');
   1580     return true;
   1581   }
   1582   return false;
   1583 }
   1584 
   1585 // Find the unscheduled node in ReadySUs with the highest latency.
   1586 unsigned ConvergingScheduler::SchedBoundary::
   1587 findMaxLatency(ArrayRef<SUnit*> ReadySUs) {
   1588   SUnit *LateSU = 0;
   1589   unsigned RemLatency = 0;
   1590   for (ArrayRef<SUnit*>::iterator I = ReadySUs.begin(), E = ReadySUs.end();
   1591        I != E; ++I) {
   1592     unsigned L = getUnscheduledLatency(*I);
   1593     if (L > RemLatency) {
   1594       RemLatency = L;
   1595       LateSU = *I;
   1596     }
   1597   }
   1598   if (LateSU) {
   1599     DEBUG(dbgs() << Available.getName() << " RemLatency SU("
   1600           << LateSU->NodeNum << ") " << RemLatency << "c\n");
   1601   }
   1602   return RemLatency;
   1603 }
   1604 
   1605 // Count resources in this zone and the remaining unscheduled
   1606 // instruction. Return the max count, scaled. Set OtherCritIdx to the critical
   1607 // resource index, or zero if the zone is issue limited.
   1608 unsigned ConvergingScheduler::SchedBoundary::
   1609 getOtherResourceCount(unsigned &OtherCritIdx) {
   1610   OtherCritIdx = 0;
   1611   if (!SchedModel->hasInstrSchedModel())
   1612     return 0;
   1613 
   1614   unsigned OtherCritCount = Rem->RemIssueCount
   1615     + (RetiredMOps * SchedModel->getMicroOpFactor());
   1616   DEBUG(dbgs() << "  " << Available.getName() << " + Remain MOps: "
   1617         << OtherCritCount / SchedModel->getMicroOpFactor() << '\n');
   1618   for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds();
   1619        PIdx != PEnd; ++PIdx) {
   1620     unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx];
   1621     if (OtherCount > OtherCritCount) {
   1622       OtherCritCount = OtherCount;
   1623       OtherCritIdx = PIdx;
   1624     }
   1625   }
   1626   if (OtherCritIdx) {
   1627     DEBUG(dbgs() << "  " << Available.getName() << " + Remain CritRes: "
   1628           << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx)
   1629           << " " << getResourceName(OtherCritIdx) << "\n");
   1630   }
   1631   return OtherCritCount;
   1632 }
   1633 
   1634 /// Set the CandPolicy for this zone given the current resources and latencies
   1635 /// inside and outside the zone.
   1636 void ConvergingScheduler::SchedBoundary::setPolicy(CandPolicy &Policy,
   1637                                                    SchedBoundary &OtherZone) {
   1638   // Now that potential stalls have been considered, apply preemptive heuristics
   1639   // based on the the total latency and resources inside and outside this
   1640   // zone.
   1641 
   1642   // Compute remaining latency. We need this both to determine whether the
   1643   // overall schedule has become latency-limited and whether the instructions
   1644   // outside this zone are resource or latency limited.
   1645   //
   1646   // The "dependent" latency is updated incrementally during scheduling as the
   1647   // max height/depth of scheduled nodes minus the cycles since it was
   1648   // scheduled:
   1649   //   DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone
   1650   //
   1651   // The "independent" latency is the max ready queue depth:
   1652   //   ILat = max N.depth for N in Available|Pending
   1653   //
   1654   // RemainingLatency is the greater of independent and dependent latency.
   1655   unsigned RemLatency = DependentLatency;
   1656   RemLatency = std::max(RemLatency, findMaxLatency(Available.elements()));
   1657   RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements()));
   1658 
   1659   // Compute the critical resource outside the zone.
   1660   unsigned OtherCritIdx;
   1661   unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx);
   1662 
   1663   bool OtherResLimited = false;
   1664   if (SchedModel->hasInstrSchedModel()) {
   1665     unsigned LFactor = SchedModel->getLatencyFactor();
   1666     OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor;
   1667   }
   1668   if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) {
   1669     Policy.ReduceLatency |= true;
   1670     DEBUG(dbgs() << "  " << Available.getName() << " RemainingLatency "
   1671           << RemLatency << " + " << CurrCycle << "c > CritPath "
   1672           << Rem->CriticalPath << "\n");
   1673   }
   1674   // If the same resource is limiting inside and outside the zone, do nothing.
   1675   if (ZoneCritResIdx == OtherCritIdx)
   1676     return;
   1677 
   1678   DEBUG(
   1679     if (IsResourceLimited) {
   1680       dbgs() << "  " << Available.getName() << " ResourceLimited: "
   1681              << getResourceName(ZoneCritResIdx) << "\n";
   1682     }
   1683     if (OtherResLimited)
   1684       dbgs() << "  RemainingLimit: " << getResourceName(OtherCritIdx) << "\n";
   1685     if (!IsResourceLimited && !OtherResLimited)
   1686       dbgs() << "  Latency limited both directions.\n");
   1687 
   1688   if (IsResourceLimited && !Policy.ReduceResIdx)
   1689     Policy.ReduceResIdx = ZoneCritResIdx;
   1690 
   1691   if (OtherResLimited)
   1692     Policy.DemandResIdx = OtherCritIdx;
   1693 }
   1694 
   1695 void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
   1696                                                      unsigned ReadyCycle) {
   1697   if (ReadyCycle < MinReadyCycle)
   1698     MinReadyCycle = ReadyCycle;
   1699 
   1700   // Check for interlocks first. For the purpose of other heuristics, an
   1701   // instruction that cannot issue appears as if it's not in the ReadyQueue.
   1702   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
   1703   if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU))
   1704     Pending.push(SU);
   1705   else
   1706     Available.push(SU);
   1707 
   1708   // Record this node as an immediate dependent of the scheduled node.
   1709   NextSUs.insert(SU);
   1710 }
   1711 
   1712 /// Move the boundary of scheduled code by one cycle.
   1713 void ConvergingScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) {
   1714   if (SchedModel->getMicroOpBufferSize() == 0) {
   1715     assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
   1716     if (MinReadyCycle > NextCycle)
   1717       NextCycle = MinReadyCycle;
   1718   }
   1719   // Update the current micro-ops, which will issue in the next cycle.
   1720   unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle);
   1721   CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps;
   1722 
   1723   // Decrement DependentLatency based on the next cycle.
   1724   if ((NextCycle - CurrCycle) > DependentLatency)
   1725     DependentLatency = 0;
   1726   else
   1727     DependentLatency -= (NextCycle - CurrCycle);
   1728 
   1729   if (!HazardRec->isEnabled()) {
   1730     // Bypass HazardRec virtual calls.
   1731     CurrCycle = NextCycle;
   1732   }
   1733   else {
   1734     // Bypass getHazardType calls in case of long latency.
   1735     for (; CurrCycle != NextCycle; ++CurrCycle) {
   1736       if (isTop())
   1737         HazardRec->AdvanceCycle();
   1738       else
   1739         HazardRec->RecedeCycle();
   1740     }
   1741   }
   1742   CheckPending = true;
   1743   unsigned LFactor = SchedModel->getLatencyFactor();
   1744   IsResourceLimited =
   1745     (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
   1746     > (int)LFactor;
   1747 
   1748   DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n');
   1749 }
   1750 
   1751 void ConvergingScheduler::SchedBoundary::incExecutedResources(unsigned PIdx,
   1752                                                               unsigned Count) {
   1753   ExecutedResCounts[PIdx] += Count;
   1754   if (ExecutedResCounts[PIdx] > MaxExecutedResCount)
   1755     MaxExecutedResCount = ExecutedResCounts[PIdx];
   1756 }
   1757 
   1758 /// Add the given processor resource to this scheduled zone.
   1759 ///
   1760 /// \param Cycles indicates the number of consecutive (non-pipelined) cycles
   1761 /// during which this resource is consumed.
   1762 ///
   1763 /// \return the next cycle at which the instruction may execute without
   1764 /// oversubscribing resources.
   1765 unsigned ConvergingScheduler::SchedBoundary::
   1766 countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) {
   1767   unsigned Factor = SchedModel->getResourceFactor(PIdx);
   1768   unsigned Count = Factor * Cycles;
   1769   DEBUG(dbgs() << "  " << getResourceName(PIdx)
   1770         << " +" << Cycles << "x" << Factor << "u\n");
   1771 
   1772   // Update Executed resources counts.
   1773   incExecutedResources(PIdx, Count);
   1774   assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted");
   1775   Rem->RemainingCounts[PIdx] -= Count;
   1776 
   1777   // Check if this resource exceeds the current critical resource. If so, it
   1778   // becomes the critical resource.
   1779   if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) {
   1780     ZoneCritResIdx = PIdx;
   1781     DEBUG(dbgs() << "  *** Critical resource "
   1782           << getResourceName(PIdx) << ": "
   1783           << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n");
   1784   }
   1785   // TODO: We don't yet model reserved resources. It's not hard though.
   1786   return CurrCycle;
   1787 }
   1788 
   1789 /// Move the boundary of scheduled code by one SUnit.
   1790 void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) {
   1791   // Update the reservation table.
   1792   if (HazardRec->isEnabled()) {
   1793     if (!isTop() && SU->isCall) {
   1794       // Calls are scheduled with their preceding instructions. For bottom-up
   1795       // scheduling, clear the pipeline state before emitting.
   1796       HazardRec->Reset();
   1797     }
   1798     HazardRec->EmitInstruction(SU);
   1799   }
   1800   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   1801   unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr());
   1802   CurrMOps += IncMOps;
   1803   // checkHazard prevents scheduling multiple instructions per cycle that exceed
   1804   // issue width. However, we commonly reach the maximum. In this case
   1805   // opportunistically bump the cycle to avoid uselessly checking everything in
   1806   // the readyQ. Furthermore, a single instruction may produce more than one
   1807   // cycle's worth of micro-ops.
   1808   //
   1809   // TODO: Also check if this SU must end a dispatch group.
   1810   unsigned NextCycle = CurrCycle;
   1811   if (CurrMOps >= SchedModel->getIssueWidth()) {
   1812     ++NextCycle;
   1813     DEBUG(dbgs() << "  *** Max MOps " << CurrMOps
   1814           << " at cycle " << CurrCycle << '\n');
   1815   }
   1816   unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle);
   1817   DEBUG(dbgs() << "  Ready @" << ReadyCycle << "c\n");
   1818 
   1819   switch (SchedModel->getMicroOpBufferSize()) {
   1820   case 0:
   1821     assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
   1822     break;
   1823   case 1:
   1824     if (ReadyCycle > NextCycle) {
   1825       NextCycle = ReadyCycle;
   1826       DEBUG(dbgs() << "  *** Stall until: " << ReadyCycle << "\n");
   1827     }
   1828     break;
   1829   default:
   1830     // We don't currently model the OOO reorder buffer, so consider all
   1831     // scheduled MOps to be "retired".
   1832     break;
   1833   }
   1834   RetiredMOps += IncMOps;
   1835 
   1836   // Update resource counts and critical resource.
   1837   if (SchedModel->hasInstrSchedModel()) {
   1838     unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor();
   1839     assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted");
   1840     Rem->RemIssueCount -= DecRemIssue;
   1841     if (ZoneCritResIdx) {
   1842       // Scale scheduled micro-ops for comparing with the critical resource.
   1843       unsigned ScaledMOps =
   1844         RetiredMOps * SchedModel->getMicroOpFactor();
   1845 
   1846       // If scaled micro-ops are now more than the previous critical resource by
   1847       // a full cycle, then micro-ops issue becomes critical.
   1848       if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx))
   1849           >= (int)SchedModel->getLatencyFactor()) {
   1850         ZoneCritResIdx = 0;
   1851         DEBUG(dbgs() << "  *** Critical resource NumMicroOps: "
   1852               << ScaledMOps / SchedModel->getLatencyFactor() << "c\n");
   1853       }
   1854     }
   1855     for (TargetSchedModel::ProcResIter
   1856            PI = SchedModel->getWriteProcResBegin(SC),
   1857            PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   1858       unsigned RCycle =
   1859         countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle);
   1860       if (RCycle > NextCycle)
   1861         NextCycle = RCycle;
   1862     }
   1863   }
   1864   // Update ExpectedLatency and DependentLatency.
   1865   unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency;
   1866   unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency;
   1867   if (SU->getDepth() > TopLatency) {
   1868     TopLatency = SU->getDepth();
   1869     DEBUG(dbgs() << "  " << Available.getName()
   1870           << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n");
   1871   }
   1872   if (SU->getHeight() > BotLatency) {
   1873     BotLatency = SU->getHeight();
   1874     DEBUG(dbgs() << "  " << Available.getName()
   1875           << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n");
   1876   }
   1877   // If we stall for any reason, bump the cycle.
   1878   if (NextCycle > CurrCycle) {
   1879     bumpCycle(NextCycle);
   1880   }
   1881   else {
   1882     // After updating ZoneCritResIdx and ExpectedLatency, check if we're
   1883     // resource limited. If a stall occured, bumpCycle does this.
   1884     unsigned LFactor = SchedModel->getLatencyFactor();
   1885     IsResourceLimited =
   1886       (int)(getCriticalCount() - (getScheduledLatency() * LFactor))
   1887       > (int)LFactor;
   1888   }
   1889   DEBUG(dumpScheduledState());
   1890 }
   1891 
   1892 /// Release pending ready nodes in to the available queue. This makes them
   1893 /// visible to heuristics.
   1894 void ConvergingScheduler::SchedBoundary::releasePending() {
   1895   // If the available queue is empty, it is safe to reset MinReadyCycle.
   1896   if (Available.empty())
   1897     MinReadyCycle = UINT_MAX;
   1898 
   1899   // Check to see if any of the pending instructions are ready to issue.  If
   1900   // so, add them to the available queue.
   1901   bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0;
   1902   for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
   1903     SUnit *SU = *(Pending.begin()+i);
   1904     unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
   1905 
   1906     if (ReadyCycle < MinReadyCycle)
   1907       MinReadyCycle = ReadyCycle;
   1908 
   1909     if (!IsBuffered && ReadyCycle > CurrCycle)
   1910       continue;
   1911 
   1912     if (checkHazard(SU))
   1913       continue;
   1914 
   1915     Available.push(SU);
   1916     Pending.remove(Pending.begin()+i);
   1917     --i; --e;
   1918   }
   1919   DEBUG(if (!Pending.empty()) Pending.dump());
   1920   CheckPending = false;
   1921 }
   1922 
   1923 /// Remove SU from the ready set for this boundary.
   1924 void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) {
   1925   if (Available.isInQueue(SU))
   1926     Available.remove(Available.find(SU));
   1927   else {
   1928     assert(Pending.isInQueue(SU) && "bad ready count");
   1929     Pending.remove(Pending.find(SU));
   1930   }
   1931 }
   1932 
   1933 /// If this queue only has one ready candidate, return it. As a side effect,
   1934 /// defer any nodes that now hit a hazard, and advance the cycle until at least
   1935 /// one node is ready. If multiple instructions are ready, return NULL.
   1936 SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
   1937   if (CheckPending)
   1938     releasePending();
   1939 
   1940   if (CurrMOps > 0) {
   1941     // Defer any ready instrs that now have a hazard.
   1942     for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) {
   1943       if (checkHazard(*I)) {
   1944         Pending.push(*I);
   1945         I = Available.remove(I);
   1946         continue;
   1947       }
   1948       ++I;
   1949     }
   1950   }
   1951   for (unsigned i = 0; Available.empty(); ++i) {
   1952     assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) &&
   1953            "permanent hazard"); (void)i;
   1954     bumpCycle(CurrCycle + 1);
   1955     releasePending();
   1956   }
   1957   if (Available.size() == 1)
   1958     return *Available.begin();
   1959   return NULL;
   1960 }
   1961 
   1962 #ifndef NDEBUG
   1963 // This is useful information to dump after bumpNode.
   1964 // Note that the Queue contents are more useful before pickNodeFromQueue.
   1965 void ConvergingScheduler::SchedBoundary::dumpScheduledState() {
   1966   unsigned ResFactor;
   1967   unsigned ResCount;
   1968   if (ZoneCritResIdx) {
   1969     ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx);
   1970     ResCount = getResourceCount(ZoneCritResIdx);
   1971   }
   1972   else {
   1973     ResFactor = SchedModel->getMicroOpFactor();
   1974     ResCount = RetiredMOps * SchedModel->getMicroOpFactor();
   1975   }
   1976   unsigned LFactor = SchedModel->getLatencyFactor();
   1977   dbgs() << Available.getName() << " @" << CurrCycle << "c\n"
   1978          << "  Retired: " << RetiredMOps;
   1979   dbgs() << "\n  Executed: " << getExecutedCount() / LFactor << "c";
   1980   dbgs() << "\n  Critical: " << ResCount / LFactor << "c, "
   1981          << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx)
   1982          << "\n  ExpectedLatency: " << ExpectedLatency << "c\n"
   1983          << (IsResourceLimited ? "  - Resource" : "  - Latency")
   1984          << " limited.\n";
   1985 }
   1986 #endif
   1987 
   1988 void ConvergingScheduler::SchedCandidate::
   1989 initResourceDelta(const ScheduleDAGMI *DAG,
   1990                   const TargetSchedModel *SchedModel) {
   1991   if (!Policy.ReduceResIdx && !Policy.DemandResIdx)
   1992     return;
   1993 
   1994   const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
   1995   for (TargetSchedModel::ProcResIter
   1996          PI = SchedModel->getWriteProcResBegin(SC),
   1997          PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
   1998     if (PI->ProcResourceIdx == Policy.ReduceResIdx)
   1999       ResDelta.CritResources += PI->Cycles;
   2000     if (PI->ProcResourceIdx == Policy.DemandResIdx)
   2001       ResDelta.DemandedResources += PI->Cycles;
   2002   }
   2003 }
   2004 
   2005 
   2006 /// Return true if this heuristic determines order.
   2007 static bool tryLess(int TryVal, int CandVal,
   2008                     ConvergingScheduler::SchedCandidate &TryCand,
   2009                     ConvergingScheduler::SchedCandidate &Cand,
   2010                     ConvergingScheduler::CandReason Reason) {
   2011   if (TryVal < CandVal) {
   2012     TryCand.Reason = Reason;
   2013     return true;
   2014   }
   2015   if (TryVal > CandVal) {
   2016     if (Cand.Reason > Reason)
   2017       Cand.Reason = Reason;
   2018     return true;
   2019   }
   2020   Cand.setRepeat(Reason);
   2021   return false;
   2022 }
   2023 
   2024 static bool tryGreater(int TryVal, int CandVal,
   2025                        ConvergingScheduler::SchedCandidate &TryCand,
   2026                        ConvergingScheduler::SchedCandidate &Cand,
   2027                        ConvergingScheduler::CandReason Reason) {
   2028   if (TryVal > CandVal) {
   2029     TryCand.Reason = Reason;
   2030     return true;
   2031   }
   2032   if (TryVal < CandVal) {
   2033     if (Cand.Reason > Reason)
   2034       Cand.Reason = Reason;
   2035     return true;
   2036   }
   2037   Cand.setRepeat(Reason);
   2038   return false;
   2039 }
   2040 
   2041 static bool tryPressure(const PressureElement &TryP,
   2042                         const PressureElement &CandP,
   2043                         ConvergingScheduler::SchedCandidate &TryCand,
   2044                         ConvergingScheduler::SchedCandidate &Cand,
   2045                         ConvergingScheduler::CandReason Reason) {
   2046   // If both candidates affect the same set, go with the smallest increase.
   2047   if (TryP.PSetID == CandP.PSetID) {
   2048     return tryLess(TryP.UnitIncrease, CandP.UnitIncrease, TryCand, Cand,
   2049                    Reason);
   2050   }
   2051   // If one candidate decreases and the other increases, go with it.
   2052   if (tryLess(TryP.UnitIncrease < 0, CandP.UnitIncrease < 0, TryCand, Cand,
   2053               Reason)) {
   2054     return true;
   2055   }
   2056   // If TryP has lower Rank, it has a higher priority.
   2057   int TryRank = TryP.PSetRank();
   2058   int CandRank = CandP.PSetRank();
   2059   // If the candidates are decreasing pressure, reverse priority.
   2060   if (TryP.UnitIncrease < 0)
   2061     std::swap(TryRank, CandRank);
   2062   return tryGreater(TryRank, CandRank, TryCand, Cand, Reason);
   2063 }
   2064 
   2065 static unsigned getWeakLeft(const SUnit *SU, bool isTop) {
   2066   return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft;
   2067 }
   2068 
   2069 /// Minimize physical register live ranges. Regalloc wants them adjacent to
   2070 /// their physreg def/use.
   2071 ///
   2072 /// FIXME: This is an unnecessary check on the critical path. Most are root/leaf
   2073 /// copies which can be prescheduled. The rest (e.g. x86 MUL) could be bundled
   2074 /// with the operation that produces or consumes the physreg. We'll do this when
   2075 /// regalloc has support for parallel copies.
   2076 static int biasPhysRegCopy(const SUnit *SU, bool isTop) {
   2077   const MachineInstr *MI = SU->getInstr();
   2078   if (!MI->isCopy())
   2079     return 0;
   2080 
   2081   unsigned ScheduledOper = isTop ? 1 : 0;
   2082   unsigned UnscheduledOper = isTop ? 0 : 1;
   2083   // If we have already scheduled the physreg produce/consumer, immediately
   2084   // schedule the copy.
   2085   if (TargetRegisterInfo::isPhysicalRegister(
   2086         MI->getOperand(ScheduledOper).getReg()))
   2087     return 1;
   2088   // If the physreg is at the boundary, defer it. Otherwise schedule it
   2089   // immediately to free the dependent. We can hoist the copy later.
   2090   bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
   2091   if (TargetRegisterInfo::isPhysicalRegister(
   2092         MI->getOperand(UnscheduledOper).getReg()))
   2093     return AtBoundary ? -1 : 1;
   2094   return 0;
   2095 }
   2096 
   2097 /// Apply a set of heursitics to a new candidate. Heuristics are currently
   2098 /// hierarchical. This may be more efficient than a graduated cost model because
   2099 /// we don't need to evaluate all aspects of the model for each node in the
   2100 /// queue. But it's really done to make the heuristics easier to debug and
   2101 /// statistically analyze.
   2102 ///
   2103 /// \param Cand provides the policy and current best candidate.
   2104 /// \param TryCand refers to the next SUnit candidate, otherwise uninitialized.
   2105 /// \param Zone describes the scheduled zone that we are extending.
   2106 /// \param RPTracker describes reg pressure within the scheduled zone.
   2107 /// \param TempTracker is a scratch pressure tracker to reuse in queries.
   2108 void ConvergingScheduler::tryCandidate(SchedCandidate &Cand,
   2109                                        SchedCandidate &TryCand,
   2110                                        SchedBoundary &Zone,
   2111                                        const RegPressureTracker &RPTracker,
   2112                                        RegPressureTracker &TempTracker) {
   2113 
   2114   // Always initialize TryCand's RPDelta.
   2115   TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta,
   2116                                   DAG->getRegionCriticalPSets(),
   2117                                   DAG->getRegPressure().MaxSetPressure);
   2118 
   2119   // Initialize the candidate if needed.
   2120   if (!Cand.isValid()) {
   2121     TryCand.Reason = NodeOrder;
   2122     return;
   2123   }
   2124 
   2125   if (tryGreater(biasPhysRegCopy(TryCand.SU, Zone.isTop()),
   2126                  biasPhysRegCopy(Cand.SU, Zone.isTop()),
   2127                  TryCand, Cand, PhysRegCopy))
   2128     return;
   2129 
   2130   // Avoid exceeding the target's limit. If signed PSetID is negative, it is
   2131   // invalid; convert it to INT_MAX to give it lowest priority.
   2132   if (tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand,
   2133                   RegExcess))
   2134     return;
   2135 
   2136   // Avoid increasing the max critical pressure in the scheduled region.
   2137   if (tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax,
   2138                   TryCand, Cand, RegCritical))
   2139     return;
   2140 
   2141   // Keep clustered nodes together to encourage downstream peephole
   2142   // optimizations which may reduce resource requirements.
   2143   //
   2144   // This is a best effort to set things up for a post-RA pass. Optimizations
   2145   // like generating loads of multiple registers should ideally be done within
   2146   // the scheduler pass by combining the loads during DAG postprocessing.
   2147   const SUnit *NextClusterSU =
   2148     Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred();
   2149   if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU,
   2150                  TryCand, Cand, Cluster))
   2151     return;
   2152 
   2153   // Weak edges are for clustering and other constraints.
   2154   if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()),
   2155               getWeakLeft(Cand.SU, Zone.isTop()),
   2156               TryCand, Cand, Weak)) {
   2157     return;
   2158   }
   2159   // Avoid increasing the max pressure of the entire region.
   2160   if (tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax,
   2161                   TryCand, Cand, RegMax))
   2162     return;
   2163 
   2164   // Avoid critical resource consumption and balance the schedule.
   2165   TryCand.initResourceDelta(DAG, SchedModel);
   2166   if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources,
   2167               TryCand, Cand, ResourceReduce))
   2168     return;
   2169   if (tryGreater(TryCand.ResDelta.DemandedResources,
   2170                  Cand.ResDelta.DemandedResources,
   2171                  TryCand, Cand, ResourceDemand))
   2172     return;
   2173 
   2174   // Avoid serializing long latency dependence chains.
   2175   if (Cand.Policy.ReduceLatency) {
   2176     if (Zone.isTop()) {
   2177       if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
   2178         if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
   2179                     TryCand, Cand, TopDepthReduce))
   2180           return;
   2181       }
   2182       if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(),
   2183                      TryCand, Cand, TopPathReduce))
   2184         return;
   2185     }
   2186     else {
   2187       if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
   2188         if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
   2189                     TryCand, Cand, BotHeightReduce))
   2190           return;
   2191       }
   2192       if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(),
   2193                      TryCand, Cand, BotPathReduce))
   2194         return;
   2195     }
   2196   }
   2197 
   2198   // Prefer immediate defs/users of the last scheduled instruction. This is a
   2199   // local pressure avoidance strategy that also makes the machine code
   2200   // readable.
   2201   if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU),
   2202                  TryCand, Cand, NextDefUse))
   2203     return;
   2204 
   2205   // Fall through to original instruction order.
   2206   if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
   2207       || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
   2208     TryCand.Reason = NodeOrder;
   2209   }
   2210 }
   2211 
   2212 #ifndef NDEBUG
   2213 const char *ConvergingScheduler::getReasonStr(
   2214   ConvergingScheduler::CandReason Reason) {
   2215   switch (Reason) {
   2216   case NoCand:         return "NOCAND    ";
   2217   case PhysRegCopy:    return "PREG-COPY";
   2218   case RegExcess:      return "REG-EXCESS";
   2219   case RegCritical:    return "REG-CRIT  ";
   2220   case Cluster:        return "CLUSTER   ";
   2221   case Weak:           return "WEAK      ";
   2222   case RegMax:         return "REG-MAX   ";
   2223   case ResourceReduce: return "RES-REDUCE";
   2224   case ResourceDemand: return "RES-DEMAND";
   2225   case TopDepthReduce: return "TOP-DEPTH ";
   2226   case TopPathReduce:  return "TOP-PATH  ";
   2227   case BotHeightReduce:return "BOT-HEIGHT";
   2228   case BotPathReduce:  return "BOT-PATH  ";
   2229   case NextDefUse:     return "DEF-USE   ";
   2230   case NodeOrder:      return "ORDER     ";
   2231   };
   2232   llvm_unreachable("Unknown reason!");
   2233 }
   2234 
   2235 void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) {
   2236   PressureElement P;
   2237   unsigned ResIdx = 0;
   2238   unsigned Latency = 0;
   2239   switch (Cand.Reason) {
   2240   default:
   2241     break;
   2242   case RegExcess:
   2243     P = Cand.RPDelta.Excess;
   2244     break;
   2245   case RegCritical:
   2246     P = Cand.RPDelta.CriticalMax;
   2247     break;
   2248   case RegMax:
   2249     P = Cand.RPDelta.CurrentMax;
   2250     break;
   2251   case ResourceReduce:
   2252     ResIdx = Cand.Policy.ReduceResIdx;
   2253     break;
   2254   case ResourceDemand:
   2255     ResIdx = Cand.Policy.DemandResIdx;
   2256     break;
   2257   case TopDepthReduce:
   2258     Latency = Cand.SU->getDepth();
   2259     break;
   2260   case TopPathReduce:
   2261     Latency = Cand.SU->getHeight();
   2262     break;
   2263   case BotHeightReduce:
   2264     Latency = Cand.SU->getHeight();
   2265     break;
   2266   case BotPathReduce:
   2267     Latency = Cand.SU->getDepth();
   2268     break;
   2269   }
   2270   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
   2271   if (P.isValid())
   2272     dbgs() << " " << TRI->getRegPressureSetName(P.PSetID)
   2273            << ":" << P.UnitIncrease << " ";
   2274   else
   2275     dbgs() << "      ";
   2276   if (ResIdx)
   2277     dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " ";
   2278   else
   2279     dbgs() << "         ";
   2280   if (Latency)
   2281     dbgs() << " " << Latency << " cycles ";
   2282   else
   2283     dbgs() << "          ";
   2284   dbgs() << '\n';
   2285 }
   2286 #endif
   2287 
   2288 /// Pick the best candidate from the top queue.
   2289 ///
   2290 /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
   2291 /// DAG building. To adjust for the current scheduling location we need to
   2292 /// maintain the number of vreg uses remaining to be top-scheduled.
   2293 void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone,
   2294                                             const RegPressureTracker &RPTracker,
   2295                                             SchedCandidate &Cand) {
   2296   ReadyQueue &Q = Zone.Available;
   2297 
   2298   DEBUG(Q.dump());
   2299 
   2300   // getMaxPressureDelta temporarily modifies the tracker.
   2301   RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
   2302 
   2303   for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
   2304 
   2305     SchedCandidate TryCand(Cand.Policy);
   2306     TryCand.SU = *I;
   2307     tryCandidate(Cand, TryCand, Zone, RPTracker, TempTracker);
   2308     if (TryCand.Reason != NoCand) {
   2309       // Initialize resource delta if needed in case future heuristics query it.
   2310       if (TryCand.ResDelta == SchedResourceDelta())
   2311         TryCand.initResourceDelta(DAG, SchedModel);
   2312       Cand.setBest(TryCand);
   2313       DEBUG(traceCandidate(Cand));
   2314     }
   2315   }
   2316 }
   2317 
   2318 static void tracePick(const ConvergingScheduler::SchedCandidate &Cand,
   2319                       bool IsTop) {
   2320   DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ")
   2321         << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n');
   2322 }
   2323 
   2324 /// Pick the best candidate node from either the top or bottom queue.
   2325 SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) {
   2326   // Schedule as far as possible in the direction of no choice. This is most
   2327   // efficient, but also provides the best heuristics for CriticalPSets.
   2328   if (SUnit *SU = Bot.pickOnlyChoice()) {
   2329     IsTopNode = false;
   2330     DEBUG(dbgs() << "Pick Bot NOCAND\n");
   2331     return SU;
   2332   }
   2333   if (SUnit *SU = Top.pickOnlyChoice()) {
   2334     IsTopNode = true;
   2335     DEBUG(dbgs() << "Pick Top NOCAND\n");
   2336     return SU;
   2337   }
   2338   CandPolicy NoPolicy;
   2339   SchedCandidate BotCand(NoPolicy);
   2340   SchedCandidate TopCand(NoPolicy);
   2341   Bot.setPolicy(BotCand.Policy, Top);
   2342   Top.setPolicy(TopCand.Policy, Bot);
   2343 
   2344   // Prefer bottom scheduling when heuristics are silent.
   2345   pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
   2346   assert(BotCand.Reason != NoCand && "failed to find the first candidate");
   2347 
   2348   // If either Q has a single candidate that provides the least increase in
   2349   // Excess pressure, we can immediately schedule from that Q.
   2350   //
   2351   // RegionCriticalPSets summarizes the pressure within the scheduled region and
   2352   // affects picking from either Q. If scheduling in one direction must
   2353   // increase pressure for one of the excess PSets, then schedule in that
   2354   // direction first to provide more freedom in the other direction.
   2355   if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess))
   2356       || (BotCand.Reason == RegCritical
   2357           && !BotCand.isRepeat(RegCritical)))
   2358   {
   2359     IsTopNode = false;
   2360     tracePick(BotCand, IsTopNode);
   2361     return BotCand.SU;
   2362   }
   2363   // Check if the top Q has a better candidate.
   2364   pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
   2365   assert(TopCand.Reason != NoCand && "failed to find the first candidate");
   2366 
   2367   // Choose the queue with the most important (lowest enum) reason.
   2368   if (TopCand.Reason < BotCand.Reason) {
   2369     IsTopNode = true;
   2370     tracePick(TopCand, IsTopNode);
   2371     return TopCand.SU;
   2372   }
   2373   // Otherwise prefer the bottom candidate, in node order if all else failed.
   2374   IsTopNode = false;
   2375   tracePick(BotCand, IsTopNode);
   2376   return BotCand.SU;
   2377 }
   2378 
   2379 /// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
   2380 SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
   2381   if (DAG->top() == DAG->bottom()) {
   2382     assert(Top.Available.empty() && Top.Pending.empty() &&
   2383            Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
   2384     return NULL;
   2385   }
   2386   SUnit *SU;
   2387   do {
   2388     if (ForceTopDown) {
   2389       SU = Top.pickOnlyChoice();
   2390       if (!SU) {
   2391         CandPolicy NoPolicy;
   2392         SchedCandidate TopCand(NoPolicy);
   2393         pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand);
   2394         assert(TopCand.Reason != NoCand && "failed to find the first candidate");
   2395         SU = TopCand.SU;
   2396       }
   2397       IsTopNode = true;
   2398     }
   2399     else if (ForceBottomUp) {
   2400       SU = Bot.pickOnlyChoice();
   2401       if (!SU) {
   2402         CandPolicy NoPolicy;
   2403         SchedCandidate BotCand(NoPolicy);
   2404         pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand);
   2405         assert(BotCand.Reason != NoCand && "failed to find the first candidate");
   2406         SU = BotCand.SU;
   2407       }
   2408       IsTopNode = false;
   2409     }
   2410     else {
   2411       SU = pickNodeBidirectional(IsTopNode);
   2412     }
   2413   } while (SU->isScheduled);
   2414 
   2415   if (SU->isTopReady())
   2416     Top.removeReady(SU);
   2417   if (SU->isBottomReady())
   2418     Bot.removeReady(SU);
   2419 
   2420   DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
   2421   return SU;
   2422 }
   2423 
   2424 void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) {
   2425 
   2426   MachineBasicBlock::iterator InsertPos = SU->getInstr();
   2427   if (!isTop)
   2428     ++InsertPos;
   2429   SmallVectorImpl<SDep> &Deps = isTop ? SU->Preds : SU->Succs;
   2430 
   2431   // Find already scheduled copies with a single physreg dependence and move
   2432   // them just above the scheduled instruction.
   2433   for (SmallVectorImpl<SDep>::iterator I = Deps.begin(), E = Deps.end();
   2434        I != E; ++I) {
   2435     if (I->getKind() != SDep::Data || !TRI->isPhysicalRegister(I->getReg()))
   2436       continue;
   2437     SUnit *DepSU = I->getSUnit();
   2438     if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
   2439       continue;
   2440     MachineInstr *Copy = DepSU->getInstr();
   2441     if (!Copy->isCopy())
   2442       continue;
   2443     DEBUG(dbgs() << "  Rescheduling physreg copy ";
   2444           I->getSUnit()->dump(DAG));
   2445     DAG->moveInstruction(Copy, InsertPos);
   2446   }
   2447 }
   2448 
   2449 /// Update the scheduler's state after scheduling a node. This is the same node
   2450 /// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
   2451 /// it's state based on the current cycle before MachineSchedStrategy does.
   2452 ///
   2453 /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling
   2454 /// them here. See comments in biasPhysRegCopy.
   2455 void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   2456   if (IsTopNode) {
   2457     SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle);
   2458     Top.bumpNode(SU);
   2459     if (SU->hasPhysRegUses)
   2460       reschedulePhysRegCopies(SU, true);
   2461   }
   2462   else {
   2463     SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle);
   2464     Bot.bumpNode(SU);
   2465     if (SU->hasPhysRegDefs)
   2466       reschedulePhysRegCopies(SU, false);
   2467   }
   2468 }
   2469 
   2470 /// Create the standard converging machine scheduler. This will be used as the
   2471 /// default scheduler if the target does not set a default.
   2472 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   2473   assert((!ForceTopDown || !ForceBottomUp) &&
   2474          "-misched-topdown incompatible with -misched-bottomup");
   2475   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler());
   2476   // Register DAG post-processors.
   2477   //
   2478   // FIXME: extend the mutation API to allow earlier mutations to instantiate
   2479   // data and pass it to later mutations. Have a single mutation that gathers
   2480   // the interesting nodes in one pass.
   2481   DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI));
   2482   if (EnableLoadCluster)
   2483     DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI));
   2484   if (EnableMacroFusion)
   2485     DAG->addMutation(new MacroFusion(DAG->TII));
   2486   return DAG;
   2487 }
   2488 static MachineSchedRegistry
   2489 ConvergingSchedRegistry("converge", "Standard converging scheduler.",
   2490                         createConvergingSched);
   2491 
   2492 //===----------------------------------------------------------------------===//
   2493 // ILP Scheduler. Currently for experimental analysis of heuristics.
   2494 //===----------------------------------------------------------------------===//
   2495 
   2496 namespace {
   2497 /// \brief Order nodes by the ILP metric.
   2498 struct ILPOrder {
   2499   const SchedDFSResult *DFSResult;
   2500   const BitVector *ScheduledTrees;
   2501   bool MaximizeILP;
   2502 
   2503   ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {}
   2504 
   2505   /// \brief Apply a less-than relation on node priority.
   2506   ///
   2507   /// (Return true if A comes after B in the Q.)
   2508   bool operator()(const SUnit *A, const SUnit *B) const {
   2509     unsigned SchedTreeA = DFSResult->getSubtreeID(A);
   2510     unsigned SchedTreeB = DFSResult->getSubtreeID(B);
   2511     if (SchedTreeA != SchedTreeB) {
   2512       // Unscheduled trees have lower priority.
   2513       if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB))
   2514         return ScheduledTrees->test(SchedTreeB);
   2515 
   2516       // Trees with shallower connections have have lower priority.
   2517       if (DFSResult->getSubtreeLevel(SchedTreeA)
   2518           != DFSResult->getSubtreeLevel(SchedTreeB)) {
   2519         return DFSResult->getSubtreeLevel(SchedTreeA)
   2520           < DFSResult->getSubtreeLevel(SchedTreeB);
   2521       }
   2522     }
   2523     if (MaximizeILP)
   2524       return DFSResult->getILP(A) < DFSResult->getILP(B);
   2525     else
   2526       return DFSResult->getILP(A) > DFSResult->getILP(B);
   2527   }
   2528 };
   2529 
   2530 /// \brief Schedule based on the ILP metric.
   2531 class ILPScheduler : public MachineSchedStrategy {
   2532   /// In case all subtrees are eventually connected to a common root through
   2533   /// data dependence (e.g. reduction), place an upper limit on their size.
   2534   ///
   2535   /// FIXME: A subtree limit is generally good, but in the situation commented
   2536   /// above, where multiple similar subtrees feed a common root, we should
   2537   /// only split at a point where the resulting subtrees will be balanced.
   2538   /// (a motivating test case must be found).
   2539   static const unsigned SubtreeLimit = 16;
   2540 
   2541   ScheduleDAGMI *DAG;
   2542   ILPOrder Cmp;
   2543 
   2544   std::vector<SUnit*> ReadyQ;
   2545 public:
   2546   ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {}
   2547 
   2548   virtual void initialize(ScheduleDAGMI *dag) {
   2549     DAG = dag;
   2550     DAG->computeDFSResult();
   2551     Cmp.DFSResult = DAG->getDFSResult();
   2552     Cmp.ScheduledTrees = &DAG->getScheduledTrees();
   2553     ReadyQ.clear();
   2554   }
   2555 
   2556   virtual void registerRoots() {
   2557     // Restore the heap in ReadyQ with the updated DFS results.
   2558     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   2559   }
   2560 
   2561   /// Implement MachineSchedStrategy interface.
   2562   /// -----------------------------------------
   2563 
   2564   /// Callback to select the highest priority node from the ready Q.
   2565   virtual SUnit *pickNode(bool &IsTopNode) {
   2566     if (ReadyQ.empty()) return NULL;
   2567     std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   2568     SUnit *SU = ReadyQ.back();
   2569     ReadyQ.pop_back();
   2570     IsTopNode = false;
   2571     DEBUG(dbgs() << "Pick node " << "SU(" << SU->NodeNum << ") "
   2572           << " ILP: " << DAG->getDFSResult()->getILP(SU)
   2573           << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @"
   2574           << DAG->getDFSResult()->getSubtreeLevel(
   2575             DAG->getDFSResult()->getSubtreeID(SU)) << '\n'
   2576           << "Scheduling " << *SU->getInstr());
   2577     return SU;
   2578   }
   2579 
   2580   /// \brief Scheduler callback to notify that a new subtree is scheduled.
   2581   virtual void scheduleTree(unsigned SubtreeID) {
   2582     std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   2583   }
   2584 
   2585   /// Callback after a node is scheduled. Mark a newly scheduled tree, notify
   2586   /// DFSResults, and resort the priority Q.
   2587   virtual void schedNode(SUnit *SU, bool IsTopNode) {
   2588     assert(!IsTopNode && "SchedDFSResult needs bottom-up");
   2589   }
   2590 
   2591   virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ }
   2592 
   2593   virtual void releaseBottomNode(SUnit *SU) {
   2594     ReadyQ.push_back(SU);
   2595     std::push_heap(ReadyQ.begin(), ReadyQ.end(), Cmp);
   2596   }
   2597 };
   2598 } // namespace
   2599 
   2600 static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
   2601   return new ScheduleDAGMI(C, new ILPScheduler(true));
   2602 }
   2603 static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
   2604   return new ScheduleDAGMI(C, new ILPScheduler(false));
   2605 }
   2606 static MachineSchedRegistry ILPMaxRegistry(
   2607   "ilpmax", "Schedule bottom-up for max ILP", createILPMaxScheduler);
   2608 static MachineSchedRegistry ILPMinRegistry(
   2609   "ilpmin", "Schedule bottom-up for min ILP", createILPMinScheduler);
   2610 
   2611 //===----------------------------------------------------------------------===//
   2612 // Machine Instruction Shuffler for Correctness Testing
   2613 //===----------------------------------------------------------------------===//
   2614 
   2615 #ifndef NDEBUG
   2616 namespace {
   2617 /// Apply a less-than relation on the node order, which corresponds to the
   2618 /// instruction order prior to scheduling. IsReverse implements greater-than.
   2619 template<bool IsReverse>
   2620 struct SUnitOrder {
   2621   bool operator()(SUnit *A, SUnit *B) const {
   2622     if (IsReverse)
   2623       return A->NodeNum > B->NodeNum;
   2624     else
   2625       return A->NodeNum < B->NodeNum;
   2626   }
   2627 };
   2628 
   2629 /// Reorder instructions as much as possible.
   2630 class InstructionShuffler : public MachineSchedStrategy {
   2631   bool IsAlternating;
   2632   bool IsTopDown;
   2633 
   2634   // Using a less-than relation (SUnitOrder<false>) for the TopQ priority
   2635   // gives nodes with a higher number higher priority causing the latest
   2636   // instructions to be scheduled first.
   2637   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<false> >
   2638     TopQ;
   2639   // When scheduling bottom-up, use greater-than as the queue priority.
   2640   PriorityQueue<SUnit*, std::vector<SUnit*>, SUnitOrder<true> >
   2641     BottomQ;
   2642 public:
   2643   InstructionShuffler(bool alternate, bool topdown)
   2644     : IsAlternating(alternate), IsTopDown(topdown) {}
   2645 
   2646   virtual void initialize(ScheduleDAGMI *) {
   2647     TopQ.clear();
   2648     BottomQ.clear();
   2649   }
   2650 
   2651   /// Implement MachineSchedStrategy interface.
   2652   /// -----------------------------------------
   2653 
   2654   virtual SUnit *pickNode(bool &IsTopNode) {
   2655     SUnit *SU;
   2656     if (IsTopDown) {
   2657       do {
   2658         if (TopQ.empty()) return NULL;
   2659         SU = TopQ.top();
   2660         TopQ.pop();
   2661       } while (SU->isScheduled);
   2662       IsTopNode = true;
   2663     }
   2664     else {
   2665       do {
   2666         if (BottomQ.empty()) return NULL;
   2667         SU = BottomQ.top();
   2668         BottomQ.pop();
   2669       } while (SU->isScheduled);
   2670       IsTopNode = false;
   2671     }
   2672     if (IsAlternating)
   2673       IsTopDown = !IsTopDown;
   2674     return SU;
   2675   }
   2676 
   2677   virtual void schedNode(SUnit *SU, bool IsTopNode) {}
   2678 
   2679   virtual void releaseTopNode(SUnit *SU) {
   2680     TopQ.push(SU);
   2681   }
   2682   virtual void releaseBottomNode(SUnit *SU) {
   2683     BottomQ.push(SU);
   2684   }
   2685 };
   2686 } // namespace
   2687 
   2688 static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
   2689   bool Alternate = !ForceTopDown && !ForceBottomUp;
   2690   bool TopDown = !ForceBottomUp;
   2691   assert((TopDown || !ForceTopDown) &&
   2692          "-misched-topdown incompatible with -misched-bottomup");
   2693   return new ScheduleDAGMI(C, new InstructionShuffler(Alternate, TopDown));
   2694 }
   2695 static MachineSchedRegistry ShufflerRegistry(
   2696   "shuffle", "Shuffle machine instructions alternating directions",
   2697   createInstructionShuffler);
   2698 #endif // !NDEBUG
   2699 
   2700 //===----------------------------------------------------------------------===//
   2701 // GraphWriter support for ScheduleDAGMI.
   2702 //===----------------------------------------------------------------------===//
   2703 
   2704 #ifndef NDEBUG
   2705 namespace llvm {
   2706 
   2707 template<> struct GraphTraits<
   2708   ScheduleDAGMI*> : public GraphTraits<ScheduleDAG*> {};
   2709 
   2710 template<>
   2711 struct DOTGraphTraits<ScheduleDAGMI*> : public DefaultDOTGraphTraits {
   2712 
   2713   DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
   2714 
   2715   static std::string getGraphName(const ScheduleDAG *G) {
   2716     return G->MF.getName();
   2717   }
   2718 
   2719   static bool renderGraphFromBottomUp() {
   2720     return true;
   2721   }
   2722 
   2723   static bool isNodeHidden(const SUnit *Node) {
   2724     return (Node->NumPreds > 10 || Node->NumSuccs > 10);
   2725   }
   2726 
   2727   static bool hasNodeAddressLabel(const SUnit *Node,
   2728                                   const ScheduleDAG *Graph) {
   2729     return false;
   2730   }
   2731 
   2732   /// If you want to override the dot attributes printed for a particular
   2733   /// edge, override this method.
   2734   static std::string getEdgeAttributes(const SUnit *Node,
   2735                                        SUnitIterator EI,
   2736                                        const ScheduleDAG *Graph) {
   2737     if (EI.isArtificialDep())
   2738       return "color=cyan,style=dashed";
   2739     if (EI.isCtrlDep())
   2740       return "color=blue,style=dashed";
   2741     return "";
   2742   }
   2743 
   2744   static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) {
   2745     std::string Str;
   2746     raw_string_ostream SS(Str);
   2747     SS << "SU(" << SU->NodeNum << ')';
   2748     return SS.str();
   2749   }
   2750   static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) {
   2751     return G->getGraphNodeLabel(SU);
   2752   }
   2753 
   2754   static std::string getNodeAttributes(const SUnit *N,
   2755                                        const ScheduleDAG *Graph) {
   2756     std::string Str("shape=Mrecord");
   2757     const SchedDFSResult *DFS =
   2758       static_cast<const ScheduleDAGMI*>(Graph)->getDFSResult();
   2759     if (DFS) {
   2760       Str += ",style=filled,fillcolor=\"#";
   2761       Str += DOT::getColorString(DFS->getSubtreeID(N));
   2762       Str += '"';
   2763     }
   2764     return Str;
   2765   }
   2766 };
   2767 } // namespace llvm
   2768 #endif // NDEBUG
   2769 
   2770 /// viewGraph - Pop up a ghostview window with the reachable parts of the DAG
   2771 /// rendered using 'dot'.
   2772 ///
   2773 void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) {
   2774 #ifndef NDEBUG
   2775   ViewGraph(this, Name, false, Title);
   2776 #else
   2777   errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on "
   2778          << "systems with Graphviz or gv!\n";
   2779 #endif  // NDEBUG
   2780 }
   2781 
   2782 /// Out-of-line implementation with no arguments is handy for gdb.
   2783 void ScheduleDAGMI::viewGraph() {
   2784   viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName());
   2785 }
   2786