1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements bottom-up and top-down register pressure reduction list 11 // schedulers, using standard algorithms. The basic approach uses a priority 12 // queue of available nodes to schedule. One at a time, nodes are taken from 13 // the priority queue (thus in priority order), checked for legality to 14 // schedule, and emitted if legal. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #define DEBUG_TYPE "pre-RA-sched" 19 #include "ScheduleDAGSDNodes.h" 20 #include "llvm/InlineAsm.h" 21 #include "llvm/CodeGen/SchedulerRegistry.h" 22 #include "llvm/CodeGen/SelectionDAGISel.h" 23 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24 #include "llvm/Target/TargetRegisterInfo.h" 25 #include "llvm/Target/TargetData.h" 26 #include "llvm/Target/TargetMachine.h" 27 #include "llvm/Target/TargetInstrInfo.h" 28 #include "llvm/Target/TargetLowering.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <climits> 36 using namespace llvm; 37 38 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40 STATISTIC(NumDups, "Number of duplicated nodes"); 41 STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43 static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47 static RegisterScheduler 48 tdrListrDAGScheduler("list-tdrr", 49 "Top-down register reduction list scheduling", 50 createTDRRListDAGScheduler); 51 static RegisterScheduler 52 sourceListDAGScheduler("source", 53 "Similar to list-burr but schedules in source " 54 "order when possible", 55 createSourceListDAGScheduler); 56 57 static RegisterScheduler 58 hybridListDAGScheduler("list-hybrid", 59 "Bottom-up register pressure aware list scheduling " 60 "which tries to balance latency and register pressure", 61 createHybridListDAGScheduler); 62 63 static RegisterScheduler 64 ILPListDAGScheduler("list-ilp", 65 "Bottom-up register pressure aware list scheduling " 66 "which tries to balance ILP and register pressure", 67 createILPListDAGScheduler); 68 69 static cl::opt<bool> DisableSchedCycles( 70 "disable-sched-cycles", cl::Hidden, cl::init(false), 71 cl::desc("Disable cycle-level precision during preRA scheduling")); 72 73 // Temporary sched=list-ilp flags until the heuristics are robust. 74 // Some options are also available under sched=list-hybrid. 75 static cl::opt<bool> DisableSchedRegPressure( 76 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 77 cl::desc("Disable regpressure priority in sched=list-ilp")); 78 static cl::opt<bool> DisableSchedLiveUses( 79 "disable-sched-live-uses", cl::Hidden, cl::init(true), 80 cl::desc("Disable live use priority in sched=list-ilp")); 81 static cl::opt<bool> DisableSchedVRegCycle( 82 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 83 cl::desc("Disable virtual register cycle interference checks")); 84 static cl::opt<bool> DisableSchedPhysRegJoin( 85 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 86 cl::desc("Disable physreg def-use affinity")); 87 static cl::opt<bool> DisableSchedStalls( 88 "disable-sched-stalls", cl::Hidden, cl::init(true), 89 cl::desc("Disable no-stall priority in sched=list-ilp")); 90 static cl::opt<bool> DisableSchedCriticalPath( 91 "disable-sched-critical-path", cl::Hidden, cl::init(false), 92 cl::desc("Disable critical path priority in sched=list-ilp")); 93 static cl::opt<bool> DisableSchedHeight( 94 "disable-sched-height", cl::Hidden, cl::init(false), 95 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 96 97 static cl::opt<int> MaxReorderWindow( 98 "max-sched-reorder", cl::Hidden, cl::init(6), 99 cl::desc("Number of instructions to allow ahead of the critical path " 100 "in sched=list-ilp")); 101 102 static cl::opt<unsigned> AvgIPC( 103 "sched-avg-ipc", cl::Hidden, cl::init(1), 104 cl::desc("Average inst/cycle whan no target itinerary exists.")); 105 106 #ifndef NDEBUG 107 namespace { 108 // For sched=list-ilp, Count the number of times each factor comes into play. 109 enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth, 110 FactStatic, FactOther, NumFactors }; 111 } 112 static const char *FactorName[NumFactors] = 113 {"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"}; 114 static int FactorCount[NumFactors]; 115 #endif //!NDEBUG 116 117 namespace { 118 //===----------------------------------------------------------------------===// 119 /// ScheduleDAGRRList - The actual register reduction list scheduler 120 /// implementation. This supports both top-down and bottom-up scheduling. 121 /// 122 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 123 private: 124 /// isBottomUp - This is true if the scheduling problem is bottom-up, false if 125 /// it is top-down. 126 bool isBottomUp; 127 128 /// NeedLatency - True if the scheduler will make use of latency information. 129 /// 130 bool NeedLatency; 131 132 /// AvailableQueue - The priority queue to use for the available SUnits. 133 SchedulingPriorityQueue *AvailableQueue; 134 135 /// PendingQueue - This contains all of the instructions whose operands have 136 /// been issued, but their results are not ready yet (due to the latency of 137 /// the operation). Once the operands becomes available, the instruction is 138 /// added to the AvailableQueue. 139 std::vector<SUnit*> PendingQueue; 140 141 /// HazardRec - The hazard recognizer to use. 142 ScheduleHazardRecognizer *HazardRec; 143 144 /// CurCycle - The current scheduler state corresponds to this cycle. 145 unsigned CurCycle; 146 147 /// MinAvailableCycle - Cycle of the soonest available instruction. 148 unsigned MinAvailableCycle; 149 150 /// IssueCount - Count instructions issued in this cycle 151 /// Currently valid only for bottom-up scheduling. 152 unsigned IssueCount; 153 154 /// LiveRegDefs - A set of physical registers and their definition 155 /// that are "live". These nodes must be scheduled before any other nodes that 156 /// modifies the registers can be scheduled. 157 unsigned NumLiveRegs; 158 std::vector<SUnit*> LiveRegDefs; 159 std::vector<SUnit*> LiveRegGens; 160 161 /// Topo - A topological ordering for SUnits which permits fast IsReachable 162 /// and similar queries. 163 ScheduleDAGTopologicalSort Topo; 164 165 public: 166 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 167 SchedulingPriorityQueue *availqueue, 168 CodeGenOpt::Level OptLevel) 169 : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), 170 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 171 Topo(SUnits) { 172 173 const TargetMachine &tm = mf.getTarget(); 174 if (DisableSchedCycles || !NeedLatency) 175 HazardRec = new ScheduleHazardRecognizer(); 176 else 177 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 178 } 179 180 ~ScheduleDAGRRList() { 181 delete HazardRec; 182 delete AvailableQueue; 183 } 184 185 void Schedule(); 186 187 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 188 189 /// IsReachable - Checks if SU is reachable from TargetSU. 190 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 191 return Topo.IsReachable(SU, TargetSU); 192 } 193 194 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 195 /// create a cycle. 196 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 197 return Topo.WillCreateCycle(SU, TargetSU); 198 } 199 200 /// AddPred - adds a predecessor edge to SUnit SU. 201 /// This returns true if this is a new predecessor. 202 /// Updates the topological ordering if required. 203 void AddPred(SUnit *SU, const SDep &D) { 204 Topo.AddPred(SU, D.getSUnit()); 205 SU->addPred(D); 206 } 207 208 /// RemovePred - removes a predecessor edge from SUnit SU. 209 /// This returns true if an edge was removed. 210 /// Updates the topological ordering if required. 211 void RemovePred(SUnit *SU, const SDep &D) { 212 Topo.RemovePred(SU, D.getSUnit()); 213 SU->removePred(D); 214 } 215 216 private: 217 bool isReady(SUnit *SU) { 218 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 219 AvailableQueue->isReady(SU); 220 } 221 222 void ReleasePred(SUnit *SU, const SDep *PredEdge); 223 void ReleasePredecessors(SUnit *SU); 224 void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); 225 void ReleaseSuccessors(SUnit *SU); 226 void ReleasePending(); 227 void AdvanceToCycle(unsigned NextCycle); 228 void AdvancePastStalls(SUnit *SU); 229 void EmitNode(SUnit *SU); 230 void ScheduleNodeBottomUp(SUnit*); 231 void CapturePred(SDep *PredEdge); 232 void UnscheduleNodeBottomUp(SUnit*); 233 void RestoreHazardCheckerBottomUp(); 234 void BacktrackBottomUp(SUnit*, SUnit*); 235 SUnit *CopyAndMoveSuccessors(SUnit*); 236 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 237 const TargetRegisterClass*, 238 const TargetRegisterClass*, 239 SmallVector<SUnit*, 2>&); 240 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 241 242 SUnit *PickNodeToScheduleBottomUp(); 243 void ListScheduleBottomUp(); 244 245 void ScheduleNodeTopDown(SUnit*); 246 void ListScheduleTopDown(); 247 248 249 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 250 /// Updates the topological ordering if required. 251 SUnit *CreateNewSUnit(SDNode *N) { 252 unsigned NumSUnits = SUnits.size(); 253 SUnit *NewNode = NewSUnit(N); 254 // Update the topological ordering. 255 if (NewNode->NodeNum >= NumSUnits) 256 Topo.InitDAGTopologicalSorting(); 257 return NewNode; 258 } 259 260 /// CreateClone - Creates a new SUnit from an existing one. 261 /// Updates the topological ordering if required. 262 SUnit *CreateClone(SUnit *N) { 263 unsigned NumSUnits = SUnits.size(); 264 SUnit *NewNode = Clone(N); 265 // Update the topological ordering. 266 if (NewNode->NodeNum >= NumSUnits) 267 Topo.InitDAGTopologicalSorting(); 268 return NewNode; 269 } 270 271 /// ForceUnitLatencies - Register-pressure-reducing scheduling doesn't 272 /// need actual latency information but the hybrid scheduler does. 273 bool ForceUnitLatencies() const { 274 return !NeedLatency; 275 } 276 }; 277 } // end anonymous namespace 278 279 /// GetCostForDef - Looks up the register class and cost for a given definition. 280 /// Typically this just means looking up the representative register class, 281 /// but for untyped values (MVT::untyped) it means inspecting the node's 282 /// opcode to determine what register class is being generated. 283 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 284 const TargetLowering *TLI, 285 const TargetInstrInfo *TII, 286 const TargetRegisterInfo *TRI, 287 unsigned &RegClass, unsigned &Cost) { 288 EVT VT = RegDefPos.GetValue(); 289 290 // Special handling for untyped values. These values can only come from 291 // the expansion of custom DAG-to-DAG patterns. 292 if (VT == MVT::untyped) { 293 const SDNode *Node = RegDefPos.GetNode(); 294 unsigned Opcode = Node->getMachineOpcode(); 295 296 if (Opcode == TargetOpcode::REG_SEQUENCE) { 297 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 298 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 299 RegClass = RC->getID(); 300 Cost = 1; 301 return; 302 } 303 304 unsigned Idx = RegDefPos.GetIdx(); 305 const MCInstrDesc Desc = TII->get(Opcode); 306 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); 307 RegClass = RC->getID(); 308 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 309 // better way to determine it. 310 Cost = 1; 311 } else { 312 RegClass = TLI->getRepRegClassFor(VT)->getID(); 313 Cost = TLI->getRepRegClassCostFor(VT); 314 } 315 } 316 317 /// Schedule - Schedule the DAG using list scheduling. 318 void ScheduleDAGRRList::Schedule() { 319 DEBUG(dbgs() 320 << "********** List Scheduling BB#" << BB->getNumber() 321 << " '" << BB->getName() << "' **********\n"); 322 #ifndef NDEBUG 323 for (int i = 0; i < NumFactors; ++i) { 324 FactorCount[i] = 0; 325 } 326 #endif //!NDEBUG 327 328 CurCycle = 0; 329 IssueCount = 0; 330 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 331 NumLiveRegs = 0; 332 LiveRegDefs.resize(TRI->getNumRegs(), NULL); 333 LiveRegGens.resize(TRI->getNumRegs(), NULL); 334 335 // Build the scheduling graph. 336 BuildSchedGraph(NULL); 337 338 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 339 SUnits[su].dumpAll(this)); 340 Topo.InitDAGTopologicalSorting(); 341 342 AvailableQueue->initNodes(SUnits); 343 344 HazardRec->Reset(); 345 346 // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. 347 if (isBottomUp) 348 ListScheduleBottomUp(); 349 else 350 ListScheduleTopDown(); 351 352 #ifndef NDEBUG 353 for (int i = 0; i < NumFactors; ++i) { 354 DEBUG(dbgs() << FactorName[i] << "\t" << FactorCount[i] << "\n"); 355 } 356 #endif // !NDEBUG 357 AvailableQueue->releaseState(); 358 } 359 360 //===----------------------------------------------------------------------===// 361 // Bottom-Up Scheduling 362 //===----------------------------------------------------------------------===// 363 364 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 365 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 366 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 367 SUnit *PredSU = PredEdge->getSUnit(); 368 369 #ifndef NDEBUG 370 if (PredSU->NumSuccsLeft == 0) { 371 dbgs() << "*** Scheduling failed! ***\n"; 372 PredSU->dump(this); 373 dbgs() << " has been released too many times!\n"; 374 llvm_unreachable(0); 375 } 376 #endif 377 --PredSU->NumSuccsLeft; 378 379 if (!ForceUnitLatencies()) { 380 // Updating predecessor's height. This is now the cycle when the 381 // predecessor can be scheduled without causing a pipeline stall. 382 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 383 } 384 385 // If all the node's successors are scheduled, this node is ready 386 // to be scheduled. Ignore the special EntrySU node. 387 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 388 PredSU->isAvailable = true; 389 390 unsigned Height = PredSU->getHeight(); 391 if (Height < MinAvailableCycle) 392 MinAvailableCycle = Height; 393 394 if (isReady(PredSU)) { 395 AvailableQueue->push(PredSU); 396 } 397 // CapturePred and others may have left the node in the pending queue, avoid 398 // adding it twice. 399 else if (!PredSU->isPending) { 400 PredSU->isPending = true; 401 PendingQueue.push_back(PredSU); 402 } 403 } 404 } 405 406 /// Call ReleasePred for each predecessor, then update register live def/gen. 407 /// Always update LiveRegDefs for a register dependence even if the current SU 408 /// also defines the register. This effectively create one large live range 409 /// across a sequence of two-address node. This is important because the 410 /// entire chain must be scheduled together. Example: 411 /// 412 /// flags = (3) add 413 /// flags = (2) addc flags 414 /// flags = (1) addc flags 415 /// 416 /// results in 417 /// 418 /// LiveRegDefs[flags] = 3 419 /// LiveRegGens[flags] = 1 420 /// 421 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 422 /// interference on flags. 423 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 424 // Bottom up: release predecessors 425 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 426 I != E; ++I) { 427 ReleasePred(SU, &*I); 428 if (I->isAssignedRegDep()) { 429 // This is a physical register dependency and it's impossible or 430 // expensive to copy the register. Make sure nothing that can 431 // clobber the register is scheduled between the predecessor and 432 // this node. 433 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 434 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 435 "interference on register dependence"); 436 LiveRegDefs[I->getReg()] = I->getSUnit(); 437 if (!LiveRegGens[I->getReg()]) { 438 ++NumLiveRegs; 439 LiveRegGens[I->getReg()] = SU; 440 } 441 } 442 } 443 } 444 445 /// Check to see if any of the pending instructions are ready to issue. If 446 /// so, add them to the available queue. 447 void ScheduleDAGRRList::ReleasePending() { 448 if (DisableSchedCycles) { 449 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 450 return; 451 } 452 453 // If the available queue is empty, it is safe to reset MinAvailableCycle. 454 if (AvailableQueue->empty()) 455 MinAvailableCycle = UINT_MAX; 456 457 // Check to see if any of the pending instructions are ready to issue. If 458 // so, add them to the available queue. 459 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 460 unsigned ReadyCycle = 461 isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); 462 if (ReadyCycle < MinAvailableCycle) 463 MinAvailableCycle = ReadyCycle; 464 465 if (PendingQueue[i]->isAvailable) { 466 if (!isReady(PendingQueue[i])) 467 continue; 468 AvailableQueue->push(PendingQueue[i]); 469 } 470 PendingQueue[i]->isPending = false; 471 PendingQueue[i] = PendingQueue.back(); 472 PendingQueue.pop_back(); 473 --i; --e; 474 } 475 } 476 477 /// Move the scheduler state forward by the specified number of Cycles. 478 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 479 if (NextCycle <= CurCycle) 480 return; 481 482 IssueCount = 0; 483 AvailableQueue->setCurCycle(NextCycle); 484 if (!HazardRec->isEnabled()) { 485 // Bypass lots of virtual calls in case of long latency. 486 CurCycle = NextCycle; 487 } 488 else { 489 for (; CurCycle != NextCycle; ++CurCycle) { 490 if (isBottomUp) 491 HazardRec->RecedeCycle(); 492 else 493 HazardRec->AdvanceCycle(); 494 } 495 } 496 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 497 // available Q to release pending nodes at least once before popping. 498 ReleasePending(); 499 } 500 501 /// Move the scheduler state forward until the specified node's dependents are 502 /// ready and can be scheduled with no resource conflicts. 503 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 504 if (DisableSchedCycles) 505 return; 506 507 // FIXME: Nodes such as CopyFromReg probably should not advance the current 508 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 509 // has predecessors the cycle will be advanced when they are scheduled. 510 // But given the crude nature of modeling latency though such nodes, we 511 // currently need to treat these nodes like real instructions. 512 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 513 514 unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); 515 516 // Bump CurCycle to account for latency. We assume the latency of other 517 // available instructions may be hidden by the stall (not a full pipe stall). 518 // This updates the hazard recognizer's cycle before reserving resources for 519 // this instruction. 520 AdvanceToCycle(ReadyCycle); 521 522 // Calls are scheduled in their preceding cycle, so don't conflict with 523 // hazards from instructions after the call. EmitNode will reset the 524 // scoreboard state before emitting the call. 525 if (isBottomUp && SU->isCall) 526 return; 527 528 // FIXME: For resource conflicts in very long non-pipelined stages, we 529 // should probably skip ahead here to avoid useless scoreboard checks. 530 int Stalls = 0; 531 while (true) { 532 ScheduleHazardRecognizer::HazardType HT = 533 HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); 534 535 if (HT == ScheduleHazardRecognizer::NoHazard) 536 break; 537 538 ++Stalls; 539 } 540 AdvanceToCycle(CurCycle + Stalls); 541 } 542 543 /// Record this SUnit in the HazardRecognizer. 544 /// Does not update CurCycle. 545 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 546 if (!HazardRec->isEnabled()) 547 return; 548 549 // Check for phys reg copy. 550 if (!SU->getNode()) 551 return; 552 553 switch (SU->getNode()->getOpcode()) { 554 default: 555 assert(SU->getNode()->isMachineOpcode() && 556 "This target-independent node should not be scheduled."); 557 break; 558 case ISD::MERGE_VALUES: 559 case ISD::TokenFactor: 560 case ISD::CopyToReg: 561 case ISD::CopyFromReg: 562 case ISD::EH_LABEL: 563 // Noops don't affect the scoreboard state. Copies are likely to be 564 // removed. 565 return; 566 case ISD::INLINEASM: 567 // For inline asm, clear the pipeline state. 568 HazardRec->Reset(); 569 return; 570 } 571 if (isBottomUp && SU->isCall) { 572 // Calls are scheduled with their preceding instructions. For bottom-up 573 // scheduling, clear the pipeline state before emitting. 574 HazardRec->Reset(); 575 } 576 577 HazardRec->EmitInstruction(SU); 578 579 if (!isBottomUp && SU->isCall) { 580 HazardRec->Reset(); 581 } 582 } 583 584 static void resetVRegCycle(SUnit *SU); 585 586 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 587 /// count of its predecessors. If a predecessor pending count is zero, add it to 588 /// the Available queue. 589 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 590 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 591 DEBUG(SU->dump(this)); 592 593 #ifndef NDEBUG 594 if (CurCycle < SU->getHeight()) 595 DEBUG(dbgs() << " Height [" << SU->getHeight() 596 << "] pipeline stall!\n"); 597 #endif 598 599 // FIXME: Do not modify node height. It may interfere with 600 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 601 // node its ready cycle can aid heuristics, and after scheduling it can 602 // indicate the scheduled cycle. 603 SU->setHeightToAtLeast(CurCycle); 604 605 // Reserve resources for the scheduled intruction. 606 EmitNode(SU); 607 608 Sequence.push_back(SU); 609 610 AvailableQueue->ScheduledNode(SU); 611 612 // If HazardRec is disabled, and each inst counts as one cycle, then 613 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 614 // PendingQueue for schedulers that implement HasReadyFilter. 615 if (!HazardRec->isEnabled() && AvgIPC < 2) 616 AdvanceToCycle(CurCycle + 1); 617 618 // Update liveness of predecessors before successors to avoid treating a 619 // two-address node as a live range def. 620 ReleasePredecessors(SU); 621 622 // Release all the implicit physical register defs that are live. 623 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 624 I != E; ++I) { 625 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 626 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 627 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 628 --NumLiveRegs; 629 LiveRegDefs[I->getReg()] = NULL; 630 LiveRegGens[I->getReg()] = NULL; 631 } 632 } 633 634 resetVRegCycle(SU); 635 636 SU->isScheduled = true; 637 638 // Conditions under which the scheduler should eagerly advance the cycle: 639 // (1) No available instructions 640 // (2) All pipelines full, so available instructions must have hazards. 641 // 642 // If HazardRec is disabled, the cycle was pre-advanced before calling 643 // ReleasePredecessors. In that case, IssueCount should remain 0. 644 // 645 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 646 if (HazardRec->isEnabled() || AvgIPC > 1) { 647 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 648 ++IssueCount; 649 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 650 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 651 AdvanceToCycle(CurCycle + 1); 652 } 653 } 654 655 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 656 /// unscheduled, incrcease the succ left count of its predecessors. Remove 657 /// them from AvailableQueue if necessary. 658 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 659 SUnit *PredSU = PredEdge->getSUnit(); 660 if (PredSU->isAvailable) { 661 PredSU->isAvailable = false; 662 if (!PredSU->isPending) 663 AvailableQueue->remove(PredSU); 664 } 665 666 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 667 ++PredSU->NumSuccsLeft; 668 } 669 670 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 671 /// its predecessor states to reflect the change. 672 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 673 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 674 DEBUG(SU->dump(this)); 675 676 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 677 I != E; ++I) { 678 CapturePred(&*I); 679 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 680 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 681 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 682 "Physical register dependency violated?"); 683 --NumLiveRegs; 684 LiveRegDefs[I->getReg()] = NULL; 685 LiveRegGens[I->getReg()] = NULL; 686 } 687 } 688 689 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 690 I != E; ++I) { 691 if (I->isAssignedRegDep()) { 692 // This becomes the nearest def. Note that an earlier def may still be 693 // pending if this is a two-address node. 694 LiveRegDefs[I->getReg()] = SU; 695 if (!LiveRegDefs[I->getReg()]) { 696 ++NumLiveRegs; 697 } 698 if (LiveRegGens[I->getReg()] == NULL || 699 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 700 LiveRegGens[I->getReg()] = I->getSUnit(); 701 } 702 } 703 if (SU->getHeight() < MinAvailableCycle) 704 MinAvailableCycle = SU->getHeight(); 705 706 SU->setHeightDirty(); 707 SU->isScheduled = false; 708 SU->isAvailable = true; 709 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 710 // Don't make available until backtracking is complete. 711 SU->isPending = true; 712 PendingQueue.push_back(SU); 713 } 714 else { 715 AvailableQueue->push(SU); 716 } 717 AvailableQueue->UnscheduledNode(SU); 718 } 719 720 /// After backtracking, the hazard checker needs to be restored to a state 721 /// corresponding the the current cycle. 722 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 723 HazardRec->Reset(); 724 725 unsigned LookAhead = std::min((unsigned)Sequence.size(), 726 HazardRec->getMaxLookAhead()); 727 if (LookAhead == 0) 728 return; 729 730 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 731 unsigned HazardCycle = (*I)->getHeight(); 732 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 733 SUnit *SU = *I; 734 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 735 HazardRec->RecedeCycle(); 736 } 737 EmitNode(SU); 738 } 739 } 740 741 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 742 /// BTCycle in order to schedule a specific node. 743 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 744 SUnit *OldSU = Sequence.back(); 745 while (true) { 746 Sequence.pop_back(); 747 if (SU->isSucc(OldSU)) 748 // Don't try to remove SU from AvailableQueue. 749 SU->isAvailable = false; 750 // FIXME: use ready cycle instead of height 751 CurCycle = OldSU->getHeight(); 752 UnscheduleNodeBottomUp(OldSU); 753 AvailableQueue->setCurCycle(CurCycle); 754 if (OldSU == BtSU) 755 break; 756 OldSU = Sequence.back(); 757 } 758 759 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 760 761 RestoreHazardCheckerBottomUp(); 762 763 ReleasePending(); 764 765 ++NumBacktracks; 766 } 767 768 static bool isOperandOf(const SUnit *SU, SDNode *N) { 769 for (const SDNode *SUNode = SU->getNode(); SUNode; 770 SUNode = SUNode->getGluedNode()) { 771 if (SUNode->isOperandOf(N)) 772 return true; 773 } 774 return false; 775 } 776 777 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 778 /// successors to the newly created node. 779 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 780 SDNode *N = SU->getNode(); 781 if (!N) 782 return NULL; 783 784 if (SU->getNode()->getGluedNode()) 785 return NULL; 786 787 SUnit *NewSU; 788 bool TryUnfold = false; 789 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 790 EVT VT = N->getValueType(i); 791 if (VT == MVT::Glue) 792 return NULL; 793 else if (VT == MVT::Other) 794 TryUnfold = true; 795 } 796 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 797 const SDValue &Op = N->getOperand(i); 798 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 799 if (VT == MVT::Glue) 800 return NULL; 801 } 802 803 if (TryUnfold) { 804 SmallVector<SDNode*, 2> NewNodes; 805 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 806 return NULL; 807 808 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 809 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 810 811 N = NewNodes[1]; 812 SDNode *LoadNode = NewNodes[0]; 813 unsigned NumVals = N->getNumValues(); 814 unsigned OldNumVals = SU->getNode()->getNumValues(); 815 for (unsigned i = 0; i != NumVals; ++i) 816 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 817 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 818 SDValue(LoadNode, 1)); 819 820 // LoadNode may already exist. This can happen when there is another 821 // load from the same location and producing the same type of value 822 // but it has different alignment or volatileness. 823 bool isNewLoad = true; 824 SUnit *LoadSU; 825 if (LoadNode->getNodeId() != -1) { 826 LoadSU = &SUnits[LoadNode->getNodeId()]; 827 isNewLoad = false; 828 } else { 829 LoadSU = CreateNewSUnit(LoadNode); 830 LoadNode->setNodeId(LoadSU->NodeNum); 831 832 InitNumRegDefsLeft(LoadSU); 833 ComputeLatency(LoadSU); 834 } 835 836 SUnit *NewSU = CreateNewSUnit(N); 837 assert(N->getNodeId() == -1 && "Node already inserted!"); 838 N->setNodeId(NewSU->NodeNum); 839 840 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 841 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 842 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 843 NewSU->isTwoAddress = true; 844 break; 845 } 846 } 847 if (MCID.isCommutable()) 848 NewSU->isCommutable = true; 849 850 InitNumRegDefsLeft(NewSU); 851 ComputeLatency(NewSU); 852 853 // Record all the edges to and from the old SU, by category. 854 SmallVector<SDep, 4> ChainPreds; 855 SmallVector<SDep, 4> ChainSuccs; 856 SmallVector<SDep, 4> LoadPreds; 857 SmallVector<SDep, 4> NodePreds; 858 SmallVector<SDep, 4> NodeSuccs; 859 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 860 I != E; ++I) { 861 if (I->isCtrl()) 862 ChainPreds.push_back(*I); 863 else if (isOperandOf(I->getSUnit(), LoadNode)) 864 LoadPreds.push_back(*I); 865 else 866 NodePreds.push_back(*I); 867 } 868 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 869 I != E; ++I) { 870 if (I->isCtrl()) 871 ChainSuccs.push_back(*I); 872 else 873 NodeSuccs.push_back(*I); 874 } 875 876 // Now assign edges to the newly-created nodes. 877 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 878 const SDep &Pred = ChainPreds[i]; 879 RemovePred(SU, Pred); 880 if (isNewLoad) 881 AddPred(LoadSU, Pred); 882 } 883 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 884 const SDep &Pred = LoadPreds[i]; 885 RemovePred(SU, Pred); 886 if (isNewLoad) 887 AddPred(LoadSU, Pred); 888 } 889 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 890 const SDep &Pred = NodePreds[i]; 891 RemovePred(SU, Pred); 892 AddPred(NewSU, Pred); 893 } 894 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 895 SDep D = NodeSuccs[i]; 896 SUnit *SuccDep = D.getSUnit(); 897 D.setSUnit(SU); 898 RemovePred(SuccDep, D); 899 D.setSUnit(NewSU); 900 AddPred(SuccDep, D); 901 // Balance register pressure. 902 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 903 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 904 --NewSU->NumRegDefsLeft; 905 } 906 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 907 SDep D = ChainSuccs[i]; 908 SUnit *SuccDep = D.getSUnit(); 909 D.setSUnit(SU); 910 RemovePred(SuccDep, D); 911 if (isNewLoad) { 912 D.setSUnit(LoadSU); 913 AddPred(SuccDep, D); 914 } 915 } 916 917 // Add a data dependency to reflect that NewSU reads the value defined 918 // by LoadSU. 919 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 920 921 if (isNewLoad) 922 AvailableQueue->addNode(LoadSU); 923 AvailableQueue->addNode(NewSU); 924 925 ++NumUnfolds; 926 927 if (NewSU->NumSuccsLeft == 0) { 928 NewSU->isAvailable = true; 929 return NewSU; 930 } 931 SU = NewSU; 932 } 933 934 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 935 NewSU = CreateClone(SU); 936 937 // New SUnit has the exact same predecessors. 938 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 939 I != E; ++I) 940 if (!I->isArtificial()) 941 AddPred(NewSU, *I); 942 943 // Only copy scheduled successors. Cut them from old node's successor 944 // list and move them over. 945 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 946 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 947 I != E; ++I) { 948 if (I->isArtificial()) 949 continue; 950 SUnit *SuccSU = I->getSUnit(); 951 if (SuccSU->isScheduled) { 952 SDep D = *I; 953 D.setSUnit(NewSU); 954 AddPred(SuccSU, D); 955 D.setSUnit(SU); 956 DelDeps.push_back(std::make_pair(SuccSU, D)); 957 } 958 } 959 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 960 RemovePred(DelDeps[i].first, DelDeps[i].second); 961 962 AvailableQueue->updateNode(SU); 963 AvailableQueue->addNode(NewSU); 964 965 ++NumDups; 966 return NewSU; 967 } 968 969 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 970 /// scheduled successors of the given SUnit to the last copy. 971 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 972 const TargetRegisterClass *DestRC, 973 const TargetRegisterClass *SrcRC, 974 SmallVector<SUnit*, 2> &Copies) { 975 SUnit *CopyFromSU = CreateNewSUnit(NULL); 976 CopyFromSU->CopySrcRC = SrcRC; 977 CopyFromSU->CopyDstRC = DestRC; 978 979 SUnit *CopyToSU = CreateNewSUnit(NULL); 980 CopyToSU->CopySrcRC = DestRC; 981 CopyToSU->CopyDstRC = SrcRC; 982 983 // Only copy scheduled successors. Cut them from old node's successor 984 // list and move them over. 985 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 986 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 987 I != E; ++I) { 988 if (I->isArtificial()) 989 continue; 990 SUnit *SuccSU = I->getSUnit(); 991 if (SuccSU->isScheduled) { 992 SDep D = *I; 993 D.setSUnit(CopyToSU); 994 AddPred(SuccSU, D); 995 DelDeps.push_back(std::make_pair(SuccSU, *I)); 996 } 997 else { 998 // Avoid scheduling the def-side copy before other successors. Otherwise 999 // we could introduce another physreg interference on the copy and 1000 // continue inserting copies indefinitely. 1001 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1002 /*Reg=*/0, /*isNormalMemory=*/false, 1003 /*isMustAlias=*/false, /*isArtificial=*/true); 1004 AddPred(SuccSU, D); 1005 } 1006 } 1007 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1008 RemovePred(DelDeps[i].first, DelDeps[i].second); 1009 1010 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1011 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1012 1013 AvailableQueue->updateNode(SU); 1014 AvailableQueue->addNode(CopyFromSU); 1015 AvailableQueue->addNode(CopyToSU); 1016 Copies.push_back(CopyFromSU); 1017 Copies.push_back(CopyToSU); 1018 1019 ++NumPRCopies; 1020 } 1021 1022 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 1023 /// definition of the specified node. 1024 /// FIXME: Move to SelectionDAG? 1025 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1026 const TargetInstrInfo *TII) { 1027 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1028 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1029 unsigned NumRes = MCID.getNumDefs(); 1030 for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1031 if (Reg == *ImpDef) 1032 break; 1033 ++NumRes; 1034 } 1035 return N->getValueType(NumRes); 1036 } 1037 1038 /// CheckForLiveRegDef - Return true and update live register vector if the 1039 /// specified register def of the specified SUnit clobbers any "live" registers. 1040 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1041 std::vector<SUnit*> &LiveRegDefs, 1042 SmallSet<unsigned, 4> &RegAdded, 1043 SmallVector<unsigned, 4> &LRegs, 1044 const TargetRegisterInfo *TRI) { 1045 for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1046 1047 // Check if Ref is live. 1048 if (!LiveRegDefs[*AliasI]) continue; 1049 1050 // Allow multiple uses of the same def. 1051 if (LiveRegDefs[*AliasI] == SU) continue; 1052 1053 // Add Reg to the set of interfering live regs. 1054 if (RegAdded.insert(*AliasI)) { 1055 LRegs.push_back(*AliasI); 1056 } 1057 } 1058 } 1059 1060 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1061 /// scheduling of the given node to satisfy live physical register dependencies. 1062 /// If the specific node is the last one that's available to schedule, do 1063 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1064 bool ScheduleDAGRRList:: 1065 DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1066 if (NumLiveRegs == 0) 1067 return false; 1068 1069 SmallSet<unsigned, 4> RegAdded; 1070 // If this node would clobber any "live" register, then it's not ready. 1071 // 1072 // If SU is the currently live definition of the same register that it uses, 1073 // then we are free to schedule it. 1074 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1075 I != E; ++I) { 1076 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1077 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1078 RegAdded, LRegs, TRI); 1079 } 1080 1081 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1082 if (Node->getOpcode() == ISD::INLINEASM) { 1083 // Inline asm can clobber physical defs. 1084 unsigned NumOps = Node->getNumOperands(); 1085 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1086 --NumOps; // Ignore the glue operand. 1087 1088 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1089 unsigned Flags = 1090 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1091 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1092 1093 ++i; // Skip the ID value. 1094 if (InlineAsm::isRegDefKind(Flags) || 1095 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1096 InlineAsm::isClobberKind(Flags)) { 1097 // Check for def of register or earlyclobber register. 1098 for (; NumVals; --NumVals, ++i) { 1099 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1100 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1101 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1102 } 1103 } else 1104 i += NumVals; 1105 } 1106 continue; 1107 } 1108 1109 if (!Node->isMachineOpcode()) 1110 continue; 1111 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1112 if (!MCID.ImplicitDefs) 1113 continue; 1114 for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) 1115 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1116 } 1117 1118 return !LRegs.empty(); 1119 } 1120 1121 /// Return a node that can be scheduled in this cycle. Requirements: 1122 /// (1) Ready: latency has been satisfied 1123 /// (2) No Hazards: resources are available 1124 /// (3) No Interferences: may unschedule to break register interferences. 1125 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1126 SmallVector<SUnit*, 4> Interferences; 1127 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1128 1129 SUnit *CurSU = AvailableQueue->pop(); 1130 while (CurSU) { 1131 SmallVector<unsigned, 4> LRegs; 1132 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1133 break; 1134 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1135 1136 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1137 Interferences.push_back(CurSU); 1138 CurSU = AvailableQueue->pop(); 1139 } 1140 if (CurSU) { 1141 // Add the nodes that aren't ready back onto the available list. 1142 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1143 Interferences[i]->isPending = false; 1144 assert(Interferences[i]->isAvailable && "must still be available"); 1145 AvailableQueue->push(Interferences[i]); 1146 } 1147 return CurSU; 1148 } 1149 1150 // All candidates are delayed due to live physical reg dependencies. 1151 // Try backtracking, code duplication, or inserting cross class copies 1152 // to resolve it. 1153 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1154 SUnit *TrySU = Interferences[i]; 1155 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1156 1157 // Try unscheduling up to the point where it's safe to schedule 1158 // this node. 1159 SUnit *BtSU = NULL; 1160 unsigned LiveCycle = UINT_MAX; 1161 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1162 unsigned Reg = LRegs[j]; 1163 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1164 BtSU = LiveRegGens[Reg]; 1165 LiveCycle = BtSU->getHeight(); 1166 } 1167 } 1168 if (!WillCreateCycle(TrySU, BtSU)) { 1169 BacktrackBottomUp(TrySU, BtSU); 1170 1171 // Force the current node to be scheduled before the node that 1172 // requires the physical reg dep. 1173 if (BtSU->isAvailable) { 1174 BtSU->isAvailable = false; 1175 if (!BtSU->isPending) 1176 AvailableQueue->remove(BtSU); 1177 } 1178 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1179 /*Reg=*/0, /*isNormalMemory=*/false, 1180 /*isMustAlias=*/false, /*isArtificial=*/true)); 1181 1182 // If one or more successors has been unscheduled, then the current 1183 // node is no longer avaialable. Schedule a successor that's now 1184 // available instead. 1185 if (!TrySU->isAvailable) { 1186 CurSU = AvailableQueue->pop(); 1187 } 1188 else { 1189 CurSU = TrySU; 1190 TrySU->isPending = false; 1191 Interferences.erase(Interferences.begin()+i); 1192 } 1193 break; 1194 } 1195 } 1196 1197 if (!CurSU) { 1198 // Can't backtrack. If it's too expensive to copy the value, then try 1199 // duplicate the nodes that produces these "too expensive to copy" 1200 // values to break the dependency. In case even that doesn't work, 1201 // insert cross class copies. 1202 // If it's not too expensive, i.e. cost != -1, issue copies. 1203 SUnit *TrySU = Interferences[0]; 1204 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1205 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1206 unsigned Reg = LRegs[0]; 1207 SUnit *LRDef = LiveRegDefs[Reg]; 1208 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1209 const TargetRegisterClass *RC = 1210 TRI->getMinimalPhysRegClass(Reg, VT); 1211 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1212 1213 // If cross copy register class is the same as RC, then it must be possible 1214 // copy the value directly. Do not try duplicate the def. 1215 // If cross copy register class is not the same as RC, then it's possible to 1216 // copy the value but it require cross register class copies and it is 1217 // expensive. 1218 // If cross copy register class is null, then it's not possible to copy 1219 // the value at all. 1220 SUnit *NewDef = 0; 1221 if (DestRC != RC) { 1222 NewDef = CopyAndMoveSuccessors(LRDef); 1223 if (!DestRC && !NewDef) 1224 report_fatal_error("Can't handle live physical register dependency!"); 1225 } 1226 if (!NewDef) { 1227 // Issue copies, these can be expensive cross register class copies. 1228 SmallVector<SUnit*, 2> Copies; 1229 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1230 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1231 << " to SU #" << Copies.front()->NodeNum << "\n"); 1232 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1233 /*Reg=*/0, /*isNormalMemory=*/false, 1234 /*isMustAlias=*/false, 1235 /*isArtificial=*/true)); 1236 NewDef = Copies.back(); 1237 } 1238 1239 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1240 << " to SU #" << TrySU->NodeNum << "\n"); 1241 LiveRegDefs[Reg] = NewDef; 1242 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1243 /*Reg=*/0, /*isNormalMemory=*/false, 1244 /*isMustAlias=*/false, 1245 /*isArtificial=*/true)); 1246 TrySU->isAvailable = false; 1247 CurSU = NewDef; 1248 } 1249 1250 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1251 1252 // Add the nodes that aren't ready back onto the available list. 1253 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1254 Interferences[i]->isPending = false; 1255 // May no longer be available due to backtracking. 1256 if (Interferences[i]->isAvailable) { 1257 AvailableQueue->push(Interferences[i]); 1258 } 1259 } 1260 return CurSU; 1261 } 1262 1263 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1264 /// schedulers. 1265 void ScheduleDAGRRList::ListScheduleBottomUp() { 1266 // Release any predecessors of the special Exit node. 1267 ReleasePredecessors(&ExitSU); 1268 1269 // Add root to Available queue. 1270 if (!SUnits.empty()) { 1271 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1272 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1273 RootSU->isAvailable = true; 1274 AvailableQueue->push(RootSU); 1275 } 1276 1277 // While Available queue is not empty, grab the node with the highest 1278 // priority. If it is not ready put it back. Schedule the node. 1279 Sequence.reserve(SUnits.size()); 1280 while (!AvailableQueue->empty()) { 1281 DEBUG(dbgs() << "\nExamining Available:\n"; 1282 AvailableQueue->dump(this)); 1283 1284 // Pick the best node to schedule taking all constraints into 1285 // consideration. 1286 SUnit *SU = PickNodeToScheduleBottomUp(); 1287 1288 AdvancePastStalls(SU); 1289 1290 ScheduleNodeBottomUp(SU); 1291 1292 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1293 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1294 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1295 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1296 } 1297 } 1298 1299 // Reverse the order if it is bottom up. 1300 std::reverse(Sequence.begin(), Sequence.end()); 1301 1302 #ifndef NDEBUG 1303 VerifySchedule(isBottomUp); 1304 #endif 1305 } 1306 1307 //===----------------------------------------------------------------------===// 1308 // Top-Down Scheduling 1309 //===----------------------------------------------------------------------===// 1310 1311 /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to 1312 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 1313 void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { 1314 SUnit *SuccSU = SuccEdge->getSUnit(); 1315 1316 #ifndef NDEBUG 1317 if (SuccSU->NumPredsLeft == 0) { 1318 dbgs() << "*** Scheduling failed! ***\n"; 1319 SuccSU->dump(this); 1320 dbgs() << " has been released too many times!\n"; 1321 llvm_unreachable(0); 1322 } 1323 #endif 1324 --SuccSU->NumPredsLeft; 1325 1326 // If all the node's predecessors are scheduled, this node is ready 1327 // to be scheduled. Ignore the special ExitSU node. 1328 if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { 1329 SuccSU->isAvailable = true; 1330 AvailableQueue->push(SuccSU); 1331 } 1332 } 1333 1334 void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { 1335 // Top down: release successors 1336 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1337 I != E; ++I) { 1338 assert(!I->isAssignedRegDep() && 1339 "The list-tdrr scheduler doesn't yet support physreg dependencies!"); 1340 1341 ReleaseSucc(SU, &*I); 1342 } 1343 } 1344 1345 /// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending 1346 /// count of its successors. If a successor pending count is zero, add it to 1347 /// the Available queue. 1348 void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { 1349 DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); 1350 DEBUG(SU->dump(this)); 1351 1352 assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); 1353 SU->setDepthToAtLeast(CurCycle); 1354 Sequence.push_back(SU); 1355 1356 ReleaseSuccessors(SU); 1357 SU->isScheduled = true; 1358 AvailableQueue->ScheduledNode(SU); 1359 } 1360 1361 /// ListScheduleTopDown - The main loop of list scheduling for top-down 1362 /// schedulers. 1363 void ScheduleDAGRRList::ListScheduleTopDown() { 1364 AvailableQueue->setCurCycle(CurCycle); 1365 1366 // Release any successors of the special Entry node. 1367 ReleaseSuccessors(&EntrySU); 1368 1369 // All leaves to Available queue. 1370 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { 1371 // It is available if it has no predecessors. 1372 if (SUnits[i].Preds.empty()) { 1373 AvailableQueue->push(&SUnits[i]); 1374 SUnits[i].isAvailable = true; 1375 } 1376 } 1377 1378 // While Available queue is not empty, grab the node with the highest 1379 // priority. If it is not ready put it back. Schedule the node. 1380 Sequence.reserve(SUnits.size()); 1381 while (!AvailableQueue->empty()) { 1382 SUnit *CurSU = AvailableQueue->pop(); 1383 1384 if (CurSU) 1385 ScheduleNodeTopDown(CurSU); 1386 ++CurCycle; 1387 AvailableQueue->setCurCycle(CurCycle); 1388 } 1389 1390 #ifndef NDEBUG 1391 VerifySchedule(isBottomUp); 1392 #endif 1393 } 1394 1395 1396 //===----------------------------------------------------------------------===// 1397 // RegReductionPriorityQueue Definition 1398 //===----------------------------------------------------------------------===// 1399 // 1400 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1401 // to reduce register pressure. 1402 // 1403 namespace { 1404 class RegReductionPQBase; 1405 1406 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1407 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1408 }; 1409 1410 #ifndef NDEBUG 1411 template<class SF> 1412 struct reverse_sort : public queue_sort { 1413 SF &SortFunc; 1414 reverse_sort(SF &sf) : SortFunc(sf) {} 1415 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1416 1417 bool operator()(SUnit* left, SUnit* right) const { 1418 // reverse left/right rather than simply !SortFunc(left, right) 1419 // to expose different paths in the comparison logic. 1420 return SortFunc(right, left); 1421 } 1422 }; 1423 #endif // NDEBUG 1424 1425 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1426 // reduction scheduler. 1427 struct bu_ls_rr_sort : public queue_sort { 1428 enum { 1429 IsBottomUp = true, 1430 HasReadyFilter = false 1431 }; 1432 1433 RegReductionPQBase *SPQ; 1434 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1435 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1436 1437 bool operator()(SUnit* left, SUnit* right) const; 1438 }; 1439 1440 // td_ls_rr_sort - Priority function for top down register pressure reduction 1441 // scheduler. 1442 struct td_ls_rr_sort : public queue_sort { 1443 enum { 1444 IsBottomUp = false, 1445 HasReadyFilter = false 1446 }; 1447 1448 RegReductionPQBase *SPQ; 1449 td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1450 td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1451 1452 bool operator()(const SUnit* left, const SUnit* right) const; 1453 }; 1454 1455 // src_ls_rr_sort - Priority function for source order scheduler. 1456 struct src_ls_rr_sort : public queue_sort { 1457 enum { 1458 IsBottomUp = true, 1459 HasReadyFilter = false 1460 }; 1461 1462 RegReductionPQBase *SPQ; 1463 src_ls_rr_sort(RegReductionPQBase *spq) 1464 : SPQ(spq) {} 1465 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1466 : SPQ(RHS.SPQ) {} 1467 1468 bool operator()(SUnit* left, SUnit* right) const; 1469 }; 1470 1471 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1472 struct hybrid_ls_rr_sort : public queue_sort { 1473 enum { 1474 IsBottomUp = true, 1475 HasReadyFilter = false 1476 }; 1477 1478 RegReductionPQBase *SPQ; 1479 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1480 : SPQ(spq) {} 1481 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1482 : SPQ(RHS.SPQ) {} 1483 1484 bool isReady(SUnit *SU, unsigned CurCycle) const; 1485 1486 bool operator()(SUnit* left, SUnit* right) const; 1487 }; 1488 1489 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1490 // scheduler. 1491 struct ilp_ls_rr_sort : public queue_sort { 1492 enum { 1493 IsBottomUp = true, 1494 HasReadyFilter = false 1495 }; 1496 1497 RegReductionPQBase *SPQ; 1498 ilp_ls_rr_sort(RegReductionPQBase *spq) 1499 : SPQ(spq) {} 1500 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1501 : SPQ(RHS.SPQ) {} 1502 1503 bool isReady(SUnit *SU, unsigned CurCycle) const; 1504 1505 bool operator()(SUnit* left, SUnit* right) const; 1506 }; 1507 1508 class RegReductionPQBase : public SchedulingPriorityQueue { 1509 protected: 1510 std::vector<SUnit*> Queue; 1511 unsigned CurQueueId; 1512 bool TracksRegPressure; 1513 1514 // SUnits - The SUnits for the current graph. 1515 std::vector<SUnit> *SUnits; 1516 1517 MachineFunction &MF; 1518 const TargetInstrInfo *TII; 1519 const TargetRegisterInfo *TRI; 1520 const TargetLowering *TLI; 1521 ScheduleDAGRRList *scheduleDAG; 1522 1523 // SethiUllmanNumbers - The SethiUllman number for each node. 1524 std::vector<unsigned> SethiUllmanNumbers; 1525 1526 /// RegPressure - Tracking current reg pressure per register class. 1527 /// 1528 std::vector<unsigned> RegPressure; 1529 1530 /// RegLimit - Tracking the number of allocatable registers per register 1531 /// class. 1532 std::vector<unsigned> RegLimit; 1533 1534 public: 1535 RegReductionPQBase(MachineFunction &mf, 1536 bool hasReadyFilter, 1537 bool tracksrp, 1538 const TargetInstrInfo *tii, 1539 const TargetRegisterInfo *tri, 1540 const TargetLowering *tli) 1541 : SchedulingPriorityQueue(hasReadyFilter), 1542 CurQueueId(0), TracksRegPressure(tracksrp), 1543 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1544 if (TracksRegPressure) { 1545 unsigned NumRC = TRI->getNumRegClasses(); 1546 RegLimit.resize(NumRC); 1547 RegPressure.resize(NumRC); 1548 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1549 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1550 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1551 E = TRI->regclass_end(); I != E; ++I) 1552 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1553 } 1554 } 1555 1556 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1557 scheduleDAG = scheduleDag; 1558 } 1559 1560 ScheduleHazardRecognizer* getHazardRec() { 1561 return scheduleDAG->getHazardRec(); 1562 } 1563 1564 void initNodes(std::vector<SUnit> &sunits); 1565 1566 void addNode(const SUnit *SU); 1567 1568 void updateNode(const SUnit *SU); 1569 1570 void releaseState() { 1571 SUnits = 0; 1572 SethiUllmanNumbers.clear(); 1573 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1574 } 1575 1576 unsigned getNodePriority(const SUnit *SU) const; 1577 1578 unsigned getNodeOrdering(const SUnit *SU) const { 1579 if (!SU->getNode()) return 0; 1580 1581 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1582 } 1583 1584 bool empty() const { return Queue.empty(); } 1585 1586 void push(SUnit *U) { 1587 assert(!U->NodeQueueId && "Node in the queue already"); 1588 U->NodeQueueId = ++CurQueueId; 1589 Queue.push_back(U); 1590 } 1591 1592 void remove(SUnit *SU) { 1593 assert(!Queue.empty() && "Queue is empty!"); 1594 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1595 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1596 SU); 1597 if (I != prior(Queue.end())) 1598 std::swap(*I, Queue.back()); 1599 Queue.pop_back(); 1600 SU->NodeQueueId = 0; 1601 } 1602 1603 bool tracksRegPressure() const { return TracksRegPressure; } 1604 1605 void dumpRegPressure() const; 1606 1607 bool HighRegPressure(const SUnit *SU) const; 1608 1609 bool MayReduceRegPressure(SUnit *SU) const; 1610 1611 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1612 1613 void ScheduledNode(SUnit *SU); 1614 1615 void UnscheduledNode(SUnit *SU); 1616 1617 protected: 1618 bool canClobber(const SUnit *SU, const SUnit *Op); 1619 void AddPseudoTwoAddrDeps(); 1620 void PrescheduleNodesWithMultipleUses(); 1621 void CalculateSethiUllmanNumbers(); 1622 }; 1623 1624 template<class SF> 1625 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1626 std::vector<SUnit *>::iterator Best = Q.begin(); 1627 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1628 E = Q.end(); I != E; ++I) 1629 if (Picker(*Best, *I)) 1630 Best = I; 1631 SUnit *V = *Best; 1632 if (Best != prior(Q.end())) 1633 std::swap(*Best, Q.back()); 1634 Q.pop_back(); 1635 return V; 1636 } 1637 1638 template<class SF> 1639 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1640 #ifndef NDEBUG 1641 if (DAG->StressSched) { 1642 reverse_sort<SF> RPicker(Picker); 1643 return popFromQueueImpl(Q, RPicker); 1644 } 1645 #endif 1646 (void)DAG; 1647 return popFromQueueImpl(Q, Picker); 1648 } 1649 1650 template<class SF> 1651 class RegReductionPriorityQueue : public RegReductionPQBase { 1652 SF Picker; 1653 1654 public: 1655 RegReductionPriorityQueue(MachineFunction &mf, 1656 bool tracksrp, 1657 const TargetInstrInfo *tii, 1658 const TargetRegisterInfo *tri, 1659 const TargetLowering *tli) 1660 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli), 1661 Picker(this) {} 1662 1663 bool isBottomUp() const { return SF::IsBottomUp; } 1664 1665 bool isReady(SUnit *U) const { 1666 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1667 } 1668 1669 SUnit *pop() { 1670 if (Queue.empty()) return NULL; 1671 1672 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1673 V->NodeQueueId = 0; 1674 return V; 1675 } 1676 1677 void dump(ScheduleDAG *DAG) const { 1678 // Emulate pop() without clobbering NodeQueueIds. 1679 std::vector<SUnit*> DumpQueue = Queue; 1680 SF DumpPicker = Picker; 1681 while (!DumpQueue.empty()) { 1682 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1683 if (isBottomUp()) 1684 dbgs() << "Height " << SU->getHeight() << ": "; 1685 else 1686 dbgs() << "Depth " << SU->getDepth() << ": "; 1687 SU->dump(DAG); 1688 } 1689 } 1690 }; 1691 1692 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1693 BURegReductionPriorityQueue; 1694 1695 typedef RegReductionPriorityQueue<td_ls_rr_sort> 1696 TDRegReductionPriorityQueue; 1697 1698 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1699 SrcRegReductionPriorityQueue; 1700 1701 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1702 HybridBURRPriorityQueue; 1703 1704 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1705 ILPBURRPriorityQueue; 1706 } // end anonymous namespace 1707 1708 //===----------------------------------------------------------------------===// 1709 // Static Node Priority for Register Pressure Reduction 1710 //===----------------------------------------------------------------------===// 1711 1712 // Check for special nodes that bypass scheduling heuristics. 1713 // Currently this pushes TokenFactor nodes down, but may be used for other 1714 // pseudo-ops as well. 1715 // 1716 // Return -1 to schedule right above left, 1 for left above right. 1717 // Return 0 if no bias exists. 1718 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1719 bool LSchedLow = left->isScheduleLow; 1720 bool RSchedLow = right->isScheduleLow; 1721 if (LSchedLow != RSchedLow) 1722 return LSchedLow < RSchedLow ? 1 : -1; 1723 return 0; 1724 } 1725 1726 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1727 /// Smaller number is the higher priority. 1728 static unsigned 1729 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1730 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1731 if (SethiUllmanNumber != 0) 1732 return SethiUllmanNumber; 1733 1734 unsigned Extra = 0; 1735 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1736 I != E; ++I) { 1737 if (I->isCtrl()) continue; // ignore chain preds 1738 SUnit *PredSU = I->getSUnit(); 1739 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1740 if (PredSethiUllman > SethiUllmanNumber) { 1741 SethiUllmanNumber = PredSethiUllman; 1742 Extra = 0; 1743 } else if (PredSethiUllman == SethiUllmanNumber) 1744 ++Extra; 1745 } 1746 1747 SethiUllmanNumber += Extra; 1748 1749 if (SethiUllmanNumber == 0) 1750 SethiUllmanNumber = 1; 1751 1752 return SethiUllmanNumber; 1753 } 1754 1755 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1756 /// scheduling units. 1757 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1758 SethiUllmanNumbers.assign(SUnits->size(), 0); 1759 1760 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1761 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1762 } 1763 1764 void RegReductionPQBase::addNode(const SUnit *SU) { 1765 unsigned SUSize = SethiUllmanNumbers.size(); 1766 if (SUnits->size() > SUSize) 1767 SethiUllmanNumbers.resize(SUSize*2, 0); 1768 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1769 } 1770 1771 void RegReductionPQBase::updateNode(const SUnit *SU) { 1772 SethiUllmanNumbers[SU->NodeNum] = 0; 1773 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1774 } 1775 1776 // Lower priority means schedule further down. For bottom-up scheduling, lower 1777 // priority SUs are scheduled before higher priority SUs. 1778 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1779 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1780 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1781 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1782 // CopyToReg should be close to its uses to facilitate coalescing and 1783 // avoid spilling. 1784 return 0; 1785 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1786 Opc == TargetOpcode::SUBREG_TO_REG || 1787 Opc == TargetOpcode::INSERT_SUBREG) 1788 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1789 // close to their uses to facilitate coalescing. 1790 return 0; 1791 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1792 // If SU does not have a register use, i.e. it doesn't produce a value 1793 // that would be consumed (e.g. store), then it terminates a chain of 1794 // computation. Give it a large SethiUllman number so it will be 1795 // scheduled right before its predecessors that it doesn't lengthen 1796 // their live ranges. 1797 return 0xffff; 1798 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1799 // If SU does not have a register def, schedule it close to its uses 1800 // because it does not lengthen any live ranges. 1801 return 0; 1802 #if 1 1803 return SethiUllmanNumbers[SU->NodeNum]; 1804 #else 1805 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1806 if (SU->isCallOp) { 1807 // FIXME: This assumes all of the defs are used as call operands. 1808 int NP = (int)Priority - SU->getNode()->getNumValues(); 1809 return (NP > 0) ? NP : 0; 1810 } 1811 return Priority; 1812 #endif 1813 } 1814 1815 //===----------------------------------------------------------------------===// 1816 // Register Pressure Tracking 1817 //===----------------------------------------------------------------------===// 1818 1819 void RegReductionPQBase::dumpRegPressure() const { 1820 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1821 E = TRI->regclass_end(); I != E; ++I) { 1822 const TargetRegisterClass *RC = *I; 1823 unsigned Id = RC->getID(); 1824 unsigned RP = RegPressure[Id]; 1825 if (!RP) continue; 1826 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1827 << '\n'); 1828 } 1829 } 1830 1831 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1832 if (!TLI) 1833 return false; 1834 1835 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1836 I != E; ++I) { 1837 if (I->isCtrl()) 1838 continue; 1839 SUnit *PredSU = I->getSUnit(); 1840 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1841 // to cover the number of registers defined (they are all live). 1842 if (PredSU->NumRegDefsLeft == 0) { 1843 continue; 1844 } 1845 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1846 RegDefPos.IsValid(); RegDefPos.Advance()) { 1847 unsigned RCId, Cost; 1848 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1849 1850 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1851 return true; 1852 } 1853 } 1854 return false; 1855 } 1856 1857 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1858 const SDNode *N = SU->getNode(); 1859 1860 if (!N->isMachineOpcode() || !SU->NumSuccs) 1861 return false; 1862 1863 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1864 for (unsigned i = 0; i != NumDefs; ++i) { 1865 EVT VT = N->getValueType(i); 1866 if (!N->hasAnyUseOfValue(i)) 1867 continue; 1868 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1869 if (RegPressure[RCId] >= RegLimit[RCId]) 1870 return true; 1871 } 1872 return false; 1873 } 1874 1875 // Compute the register pressure contribution by this instruction by count up 1876 // for uses that are not live and down for defs. Only count register classes 1877 // that are already under high pressure. As a side effect, compute the number of 1878 // uses of registers that are already live. 1879 // 1880 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1881 // so could probably be factored. 1882 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1883 LiveUses = 0; 1884 int PDiff = 0; 1885 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1886 I != E; ++I) { 1887 if (I->isCtrl()) 1888 continue; 1889 SUnit *PredSU = I->getSUnit(); 1890 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1891 // to cover the number of registers defined (they are all live). 1892 if (PredSU->NumRegDefsLeft == 0) { 1893 if (PredSU->getNode()->isMachineOpcode()) 1894 ++LiveUses; 1895 continue; 1896 } 1897 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1898 RegDefPos.IsValid(); RegDefPos.Advance()) { 1899 EVT VT = RegDefPos.GetValue(); 1900 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1901 if (RegPressure[RCId] >= RegLimit[RCId]) 1902 ++PDiff; 1903 } 1904 } 1905 const SDNode *N = SU->getNode(); 1906 1907 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1908 return PDiff; 1909 1910 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1911 for (unsigned i = 0; i != NumDefs; ++i) { 1912 EVT VT = N->getValueType(i); 1913 if (!N->hasAnyUseOfValue(i)) 1914 continue; 1915 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1916 if (RegPressure[RCId] >= RegLimit[RCId]) 1917 --PDiff; 1918 } 1919 return PDiff; 1920 } 1921 1922 void RegReductionPQBase::ScheduledNode(SUnit *SU) { 1923 if (!TracksRegPressure) 1924 return; 1925 1926 if (!SU->getNode()) 1927 return; 1928 1929 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1930 I != E; ++I) { 1931 if (I->isCtrl()) 1932 continue; 1933 SUnit *PredSU = I->getSUnit(); 1934 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1935 // to cover the number of registers defined (they are all live). 1936 if (PredSU->NumRegDefsLeft == 0) { 1937 continue; 1938 } 1939 // FIXME: The ScheduleDAG currently loses information about which of a 1940 // node's values is consumed by each dependence. Consequently, if the node 1941 // defines multiple register classes, we don't know which to pressurize 1942 // here. Instead the following loop consumes the register defs in an 1943 // arbitrary order. At least it handles the common case of clustered loads 1944 // to the same class. For precise liveness, each SDep needs to indicate the 1945 // result number. But that tightly couples the ScheduleDAG with the 1946 // SelectionDAG making updates tricky. A simpler hack would be to attach a 1947 // value type or register class to SDep. 1948 // 1949 // The most important aspect of register tracking is balancing the increase 1950 // here with the reduction further below. Note that this SU may use multiple 1951 // defs in PredSU. The can't be determined here, but we've already 1952 // compensated by reducing NumRegDefsLeft in PredSU during 1953 // ScheduleDAGSDNodes::AddSchedEdges. 1954 --PredSU->NumRegDefsLeft; 1955 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 1956 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1957 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1958 if (SkipRegDefs) 1959 continue; 1960 1961 unsigned RCId, Cost; 1962 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1963 RegPressure[RCId] += Cost; 1964 break; 1965 } 1966 } 1967 1968 // We should have this assert, but there may be dead SDNodes that never 1969 // materialize as SUnits, so they don't appear to generate liveness. 1970 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 1971 int SkipRegDefs = (int)SU->NumRegDefsLeft; 1972 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 1973 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 1974 if (SkipRegDefs > 0) 1975 continue; 1976 unsigned RCId, Cost; 1977 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1978 if (RegPressure[RCId] < Cost) { 1979 // Register pressure tracking is imprecise. This can happen. But we try 1980 // hard not to let it happen because it likely results in poor scheduling. 1981 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 1982 RegPressure[RCId] = 0; 1983 } 1984 else { 1985 RegPressure[RCId] -= Cost; 1986 } 1987 } 1988 dumpRegPressure(); 1989 } 1990 1991 void RegReductionPQBase::UnscheduledNode(SUnit *SU) { 1992 if (!TracksRegPressure) 1993 return; 1994 1995 const SDNode *N = SU->getNode(); 1996 if (!N) return; 1997 1998 if (!N->isMachineOpcode()) { 1999 if (N->getOpcode() != ISD::CopyToReg) 2000 return; 2001 } else { 2002 unsigned Opc = N->getMachineOpcode(); 2003 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2004 Opc == TargetOpcode::INSERT_SUBREG || 2005 Opc == TargetOpcode::SUBREG_TO_REG || 2006 Opc == TargetOpcode::REG_SEQUENCE || 2007 Opc == TargetOpcode::IMPLICIT_DEF) 2008 return; 2009 } 2010 2011 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2012 I != E; ++I) { 2013 if (I->isCtrl()) 2014 continue; 2015 SUnit *PredSU = I->getSUnit(); 2016 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2017 // counts data deps. 2018 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2019 continue; 2020 const SDNode *PN = PredSU->getNode(); 2021 if (!PN->isMachineOpcode()) { 2022 if (PN->getOpcode() == ISD::CopyFromReg) { 2023 EVT VT = PN->getValueType(0); 2024 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2025 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2026 } 2027 continue; 2028 } 2029 unsigned POpc = PN->getMachineOpcode(); 2030 if (POpc == TargetOpcode::IMPLICIT_DEF) 2031 continue; 2032 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2033 POpc == TargetOpcode::INSERT_SUBREG || 2034 POpc == TargetOpcode::SUBREG_TO_REG) { 2035 EVT VT = PN->getValueType(0); 2036 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2037 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2038 continue; 2039 } 2040 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2041 for (unsigned i = 0; i != NumDefs; ++i) { 2042 EVT VT = PN->getValueType(i); 2043 if (!PN->hasAnyUseOfValue(i)) 2044 continue; 2045 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2046 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2047 // Register pressure tracking is imprecise. This can happen. 2048 RegPressure[RCId] = 0; 2049 else 2050 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2051 } 2052 } 2053 2054 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2055 // may transfer data dependencies to CopyToReg. 2056 if (SU->NumSuccs && N->isMachineOpcode()) { 2057 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2058 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2059 EVT VT = N->getValueType(i); 2060 if (VT == MVT::Glue || VT == MVT::Other) 2061 continue; 2062 if (!N->hasAnyUseOfValue(i)) 2063 continue; 2064 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2065 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2066 } 2067 } 2068 2069 dumpRegPressure(); 2070 } 2071 2072 //===----------------------------------------------------------------------===// 2073 // Dynamic Node Priority for Register Pressure Reduction 2074 //===----------------------------------------------------------------------===// 2075 2076 /// closestSucc - Returns the scheduled cycle of the successor which is 2077 /// closest to the current cycle. 2078 static unsigned closestSucc(const SUnit *SU) { 2079 unsigned MaxHeight = 0; 2080 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2081 I != E; ++I) { 2082 if (I->isCtrl()) continue; // ignore chain succs 2083 unsigned Height = I->getSUnit()->getHeight(); 2084 // If there are bunch of CopyToRegs stacked up, they should be considered 2085 // to be at the same position. 2086 if (I->getSUnit()->getNode() && 2087 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2088 Height = closestSucc(I->getSUnit())+1; 2089 if (Height > MaxHeight) 2090 MaxHeight = Height; 2091 } 2092 return MaxHeight; 2093 } 2094 2095 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 2096 /// for scratch registers, i.e. number of data dependencies. 2097 static unsigned calcMaxScratches(const SUnit *SU) { 2098 unsigned Scratches = 0; 2099 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2100 I != E; ++I) { 2101 if (I->isCtrl()) continue; // ignore chain preds 2102 Scratches++; 2103 } 2104 return Scratches; 2105 } 2106 2107 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2108 /// CopyFromReg from a virtual register. 2109 static bool hasOnlyLiveInOpers(const SUnit *SU) { 2110 bool RetVal = false; 2111 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2112 I != E; ++I) { 2113 if (I->isCtrl()) continue; 2114 const SUnit *PredSU = I->getSUnit(); 2115 if (PredSU->getNode() && 2116 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2117 unsigned Reg = 2118 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2119 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2120 RetVal = true; 2121 continue; 2122 } 2123 } 2124 return false; 2125 } 2126 return RetVal; 2127 } 2128 2129 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2130 /// CopyToReg to a virtual register. This SU def is probably a liveout and 2131 /// it has no other use. It should be scheduled closer to the terminator. 2132 static bool hasOnlyLiveOutUses(const SUnit *SU) { 2133 bool RetVal = false; 2134 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2135 I != E; ++I) { 2136 if (I->isCtrl()) continue; 2137 const SUnit *SuccSU = I->getSUnit(); 2138 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2139 unsigned Reg = 2140 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2141 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2142 RetVal = true; 2143 continue; 2144 } 2145 } 2146 return false; 2147 } 2148 return RetVal; 2149 } 2150 2151 // Set isVRegCycle for a node with only live in opers and live out uses. Also 2152 // set isVRegCycle for its CopyFromReg operands. 2153 // 2154 // This is only relevant for single-block loops, in which case the VRegCycle 2155 // node is likely an induction variable in which the operand and target virtual 2156 // registers should be coalesced (e.g. pre/post increment values). Setting the 2157 // isVRegCycle flag helps the scheduler prioritize other uses of the same 2158 // CopyFromReg so that this node becomes the virtual register "kill". This 2159 // avoids interference between the values live in and out of the block and 2160 // eliminates a copy inside the loop. 2161 static void initVRegCycle(SUnit *SU) { 2162 if (DisableSchedVRegCycle) 2163 return; 2164 2165 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2166 return; 2167 2168 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2169 2170 SU->isVRegCycle = true; 2171 2172 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2173 I != E; ++I) { 2174 if (I->isCtrl()) continue; 2175 I->getSUnit()->isVRegCycle = true; 2176 } 2177 } 2178 2179 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2180 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 2181 static void resetVRegCycle(SUnit *SU) { 2182 if (!SU->isVRegCycle) 2183 return; 2184 2185 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2186 I != E; ++I) { 2187 if (I->isCtrl()) continue; // ignore chain preds 2188 SUnit *PredSU = I->getSUnit(); 2189 if (PredSU->isVRegCycle) { 2190 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2191 "VRegCycle def must be CopyFromReg"); 2192 I->getSUnit()->isVRegCycle = 0; 2193 } 2194 } 2195 } 2196 2197 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2198 // means a node that defines the VRegCycle has not been scheduled yet. 2199 static bool hasVRegCycleUse(const SUnit *SU) { 2200 // If this SU also defines the VReg, don't hoist it as a "use". 2201 if (SU->isVRegCycle) 2202 return false; 2203 2204 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2205 I != E; ++I) { 2206 if (I->isCtrl()) continue; // ignore chain preds 2207 if (I->getSUnit()->isVRegCycle && 2208 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2209 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2210 return true; 2211 } 2212 } 2213 return false; 2214 } 2215 2216 // Check for either a dependence (latency) or resource (hazard) stall. 2217 // 2218 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2219 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2220 if ((int)SPQ->getCurCycle() < Height) return true; 2221 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2222 != ScheduleHazardRecognizer::NoHazard) 2223 return true; 2224 return false; 2225 } 2226 2227 // Return -1 if left has higher priority, 1 if right has higher priority. 2228 // Return 0 if latency-based priority is equivalent. 2229 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2230 RegReductionPQBase *SPQ) { 2231 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2232 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2233 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2234 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2235 int LHeight = (int)left->getHeight() + LPenalty; 2236 int RHeight = (int)right->getHeight() + RPenalty; 2237 2238 bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && 2239 BUHasStall(left, LHeight, SPQ); 2240 bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && 2241 BUHasStall(right, RHeight, SPQ); 2242 2243 // If scheduling one of the node will cause a pipeline stall, delay it. 2244 // If scheduling either one of the node will cause a pipeline stall, sort 2245 // them according to their height. 2246 if (LStall) { 2247 if (!RStall) { 2248 DEBUG(++FactorCount[FactStall]); 2249 return 1; 2250 } 2251 if (LHeight != RHeight) { 2252 DEBUG(++FactorCount[FactStall]); 2253 return LHeight > RHeight ? 1 : -1; 2254 } 2255 } else if (RStall) { 2256 DEBUG(++FactorCount[FactStall]); 2257 return -1; 2258 } 2259 2260 // If either node is scheduling for latency, sort them by height/depth 2261 // and latency. 2262 if (!checkPref || (left->SchedulingPref == Sched::Latency || 2263 right->SchedulingPref == Sched::Latency)) { 2264 if (DisableSchedCycles) { 2265 if (LHeight != RHeight) { 2266 DEBUG(++FactorCount[FactHeight]); 2267 return LHeight > RHeight ? 1 : -1; 2268 } 2269 } 2270 else { 2271 // If neither instruction stalls (!LStall && !RStall) then 2272 // its height is already covered so only its depth matters. We also reach 2273 // this if both stall but have the same height. 2274 int LDepth = left->getDepth() - LPenalty; 2275 int RDepth = right->getDepth() - RPenalty; 2276 if (LDepth != RDepth) { 2277 DEBUG(++FactorCount[FactDepth]); 2278 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2279 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2280 << ") depth " << RDepth << "\n"); 2281 return LDepth < RDepth ? 1 : -1; 2282 } 2283 } 2284 if (left->Latency != right->Latency) { 2285 DEBUG(++FactorCount[FactOther]); 2286 return left->Latency > right->Latency ? 1 : -1; 2287 } 2288 } 2289 return 0; 2290 } 2291 2292 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2293 // Schedule physical register definitions close to their use. This is 2294 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2295 // long as shortening physreg live ranges is generally good, we can defer 2296 // creating a subtarget hook. 2297 if (!DisableSchedPhysRegJoin) { 2298 bool LHasPhysReg = left->hasPhysRegDefs; 2299 bool RHasPhysReg = right->hasPhysRegDefs; 2300 if (LHasPhysReg != RHasPhysReg) { 2301 DEBUG(++FactorCount[FactRegUses]); 2302 #ifndef NDEBUG 2303 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2304 #endif 2305 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2306 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2307 << PhysRegMsg[RHasPhysReg] << "\n"); 2308 return LHasPhysReg < RHasPhysReg; 2309 } 2310 } 2311 2312 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2313 unsigned LPriority = SPQ->getNodePriority(left); 2314 unsigned RPriority = SPQ->getNodePriority(right); 2315 2316 // Be really careful about hoisting call operands above previous calls. 2317 // Only allows it if it would reduce register pressure. 2318 if (left->isCall && right->isCallOp) { 2319 unsigned RNumVals = right->getNode()->getNumValues(); 2320 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2321 } 2322 if (right->isCall && left->isCallOp) { 2323 unsigned LNumVals = left->getNode()->getNumValues(); 2324 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2325 } 2326 2327 if (LPriority != RPriority) { 2328 DEBUG(++FactorCount[FactStatic]); 2329 return LPriority > RPriority; 2330 } 2331 2332 // One or both of the nodes are calls and their sethi-ullman numbers are the 2333 // same, then keep source order. 2334 if (left->isCall || right->isCall) { 2335 unsigned LOrder = SPQ->getNodeOrdering(left); 2336 unsigned ROrder = SPQ->getNodeOrdering(right); 2337 2338 // Prefer an ordering where the lower the non-zero order number, the higher 2339 // the preference. 2340 if ((LOrder || ROrder) && LOrder != ROrder) 2341 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2342 } 2343 2344 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2345 // e.g. 2346 // t1 = op t2, c1 2347 // t3 = op t4, c2 2348 // 2349 // and the following instructions are both ready. 2350 // t2 = op c3 2351 // t4 = op c4 2352 // 2353 // Then schedule t2 = op first. 2354 // i.e. 2355 // t4 = op c4 2356 // t2 = op c3 2357 // t1 = op t2, c1 2358 // t3 = op t4, c2 2359 // 2360 // This creates more short live intervals. 2361 unsigned LDist = closestSucc(left); 2362 unsigned RDist = closestSucc(right); 2363 if (LDist != RDist) { 2364 DEBUG(++FactorCount[FactOther]); 2365 return LDist < RDist; 2366 } 2367 2368 // How many registers becomes live when the node is scheduled. 2369 unsigned LScratch = calcMaxScratches(left); 2370 unsigned RScratch = calcMaxScratches(right); 2371 if (LScratch != RScratch) { 2372 DEBUG(++FactorCount[FactOther]); 2373 return LScratch > RScratch; 2374 } 2375 2376 // Comparing latency against a call makes little sense unless the node 2377 // is register pressure-neutral. 2378 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2379 return (left->NodeQueueId > right->NodeQueueId); 2380 2381 // Do not compare latencies when one or both of the nodes are calls. 2382 if (!DisableSchedCycles && 2383 !(left->isCall || right->isCall)) { 2384 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2385 if (result != 0) 2386 return result > 0; 2387 } 2388 else { 2389 if (left->getHeight() != right->getHeight()) { 2390 DEBUG(++FactorCount[FactHeight]); 2391 return left->getHeight() > right->getHeight(); 2392 } 2393 2394 if (left->getDepth() != right->getDepth()) { 2395 DEBUG(++FactorCount[FactDepth]); 2396 return left->getDepth() < right->getDepth(); 2397 } 2398 } 2399 2400 assert(left->NodeQueueId && right->NodeQueueId && 2401 "NodeQueueId cannot be zero"); 2402 DEBUG(++FactorCount[FactOther]); 2403 return (left->NodeQueueId > right->NodeQueueId); 2404 } 2405 2406 // Bottom up 2407 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2408 if (int res = checkSpecialNodes(left, right)) 2409 return res > 0; 2410 2411 return BURRSort(left, right, SPQ); 2412 } 2413 2414 // Source order, otherwise bottom up. 2415 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2416 if (int res = checkSpecialNodes(left, right)) 2417 return res > 0; 2418 2419 unsigned LOrder = SPQ->getNodeOrdering(left); 2420 unsigned ROrder = SPQ->getNodeOrdering(right); 2421 2422 // Prefer an ordering where the lower the non-zero order number, the higher 2423 // the preference. 2424 if ((LOrder || ROrder) && LOrder != ROrder) 2425 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2426 2427 return BURRSort(left, right, SPQ); 2428 } 2429 2430 // If the time between now and when the instruction will be ready can cover 2431 // the spill code, then avoid adding it to the ready queue. This gives long 2432 // stalls highest priority and allows hoisting across calls. It should also 2433 // speed up processing the available queue. 2434 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2435 static const unsigned ReadyDelay = 3; 2436 2437 if (SPQ->MayReduceRegPressure(SU)) return true; 2438 2439 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2440 2441 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2442 != ScheduleHazardRecognizer::NoHazard) 2443 return false; 2444 2445 return true; 2446 } 2447 2448 // Return true if right should be scheduled with higher priority than left. 2449 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2450 if (int res = checkSpecialNodes(left, right)) 2451 return res > 0; 2452 2453 if (left->isCall || right->isCall) 2454 // No way to compute latency of calls. 2455 return BURRSort(left, right, SPQ); 2456 2457 bool LHigh = SPQ->HighRegPressure(left); 2458 bool RHigh = SPQ->HighRegPressure(right); 2459 // Avoid causing spills. If register pressure is high, schedule for 2460 // register pressure reduction. 2461 if (LHigh && !RHigh) { 2462 DEBUG(++FactorCount[FactPressureDiff]); 2463 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2464 << right->NodeNum << ")\n"); 2465 return true; 2466 } 2467 else if (!LHigh && RHigh) { 2468 DEBUG(++FactorCount[FactPressureDiff]); 2469 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2470 << left->NodeNum << ")\n"); 2471 return false; 2472 } 2473 if (!LHigh && !RHigh) { 2474 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2475 if (result != 0) 2476 return result > 0; 2477 } 2478 return BURRSort(left, right, SPQ); 2479 } 2480 2481 // Schedule as many instructions in each cycle as possible. So don't make an 2482 // instruction available unless it is ready in the current cycle. 2483 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2484 if (SU->getHeight() > CurCycle) return false; 2485 2486 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2487 != ScheduleHazardRecognizer::NoHazard) 2488 return false; 2489 2490 return true; 2491 } 2492 2493 static bool canEnableCoalescing(SUnit *SU) { 2494 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2495 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2496 // CopyToReg should be close to its uses to facilitate coalescing and 2497 // avoid spilling. 2498 return true; 2499 2500 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2501 Opc == TargetOpcode::SUBREG_TO_REG || 2502 Opc == TargetOpcode::INSERT_SUBREG) 2503 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2504 // close to their uses to facilitate coalescing. 2505 return true; 2506 2507 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2508 // If SU does not have a register def, schedule it close to its uses 2509 // because it does not lengthen any live ranges. 2510 return true; 2511 2512 return false; 2513 } 2514 2515 // list-ilp is currently an experimental scheduler that allows various 2516 // heuristics to be enabled prior to the normal register reduction logic. 2517 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2518 if (int res = checkSpecialNodes(left, right)) 2519 return res > 0; 2520 2521 if (left->isCall || right->isCall) 2522 // No way to compute latency of calls. 2523 return BURRSort(left, right, SPQ); 2524 2525 unsigned LLiveUses = 0, RLiveUses = 0; 2526 int LPDiff = 0, RPDiff = 0; 2527 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2528 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2529 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2530 } 2531 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2532 DEBUG(++FactorCount[FactPressureDiff]); 2533 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2534 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2535 return LPDiff > RPDiff; 2536 } 2537 2538 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2539 bool LReduce = canEnableCoalescing(left); 2540 bool RReduce = canEnableCoalescing(right); 2541 DEBUG(if (LReduce != RReduce) ++FactorCount[FactPressureDiff]); 2542 if (LReduce && !RReduce) return false; 2543 if (RReduce && !LReduce) return true; 2544 } 2545 2546 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2547 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2548 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2549 DEBUG(++FactorCount[FactRegUses]); 2550 return LLiveUses < RLiveUses; 2551 } 2552 2553 if (!DisableSchedStalls) { 2554 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2555 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2556 if (LStall != RStall) { 2557 DEBUG(++FactorCount[FactHeight]); 2558 return left->getHeight() > right->getHeight(); 2559 } 2560 } 2561 2562 if (!DisableSchedCriticalPath) { 2563 int spread = (int)left->getDepth() - (int)right->getDepth(); 2564 if (std::abs(spread) > MaxReorderWindow) { 2565 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2566 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2567 << right->getDepth() << "\n"); 2568 DEBUG(++FactorCount[FactDepth]); 2569 return left->getDepth() < right->getDepth(); 2570 } 2571 } 2572 2573 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2574 int spread = (int)left->getHeight() - (int)right->getHeight(); 2575 if (std::abs(spread) > MaxReorderWindow) { 2576 DEBUG(++FactorCount[FactHeight]); 2577 return left->getHeight() > right->getHeight(); 2578 } 2579 } 2580 2581 return BURRSort(left, right, SPQ); 2582 } 2583 2584 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2585 SUnits = &sunits; 2586 // Add pseudo dependency edges for two-address nodes. 2587 AddPseudoTwoAddrDeps(); 2588 // Reroute edges to nodes with multiple uses. 2589 if (!TracksRegPressure) 2590 PrescheduleNodesWithMultipleUses(); 2591 // Calculate node priorities. 2592 CalculateSethiUllmanNumbers(); 2593 2594 // For single block loops, mark nodes that look like canonical IV increments. 2595 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2596 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2597 initVRegCycle(&sunits[i]); 2598 } 2599 } 2600 } 2601 2602 //===----------------------------------------------------------------------===// 2603 // Preschedule for Register Pressure 2604 //===----------------------------------------------------------------------===// 2605 2606 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2607 if (SU->isTwoAddress) { 2608 unsigned Opc = SU->getNode()->getMachineOpcode(); 2609 const MCInstrDesc &MCID = TII->get(Opc); 2610 unsigned NumRes = MCID.getNumDefs(); 2611 unsigned NumOps = MCID.getNumOperands() - NumRes; 2612 for (unsigned i = 0; i != NumOps; ++i) { 2613 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2614 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2615 if (DU->getNodeId() != -1 && 2616 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2617 return true; 2618 } 2619 } 2620 } 2621 return false; 2622 } 2623 2624 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2625 /// successor's explicit physregs whose definition can reach DepSU. 2626 /// i.e. DepSU should not be scheduled above SU. 2627 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2628 ScheduleDAGRRList *scheduleDAG, 2629 const TargetInstrInfo *TII, 2630 const TargetRegisterInfo *TRI) { 2631 const unsigned *ImpDefs 2632 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2633 if(!ImpDefs) 2634 return false; 2635 2636 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2637 SI != SE; ++SI) { 2638 SUnit *SuccSU = SI->getSUnit(); 2639 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2640 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2641 if (!PI->isAssignedRegDep()) 2642 continue; 2643 2644 for (const unsigned *ImpDef = ImpDefs; *ImpDef; ++ImpDef) { 2645 // Return true if SU clobbers this physical register use and the 2646 // definition of the register reaches from DepSU. IsReachable queries a 2647 // topological forward sort of the DAG (following the successors). 2648 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2649 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2650 return true; 2651 } 2652 } 2653 } 2654 return false; 2655 } 2656 2657 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2658 /// physical register defs. 2659 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2660 const TargetInstrInfo *TII, 2661 const TargetRegisterInfo *TRI) { 2662 SDNode *N = SuccSU->getNode(); 2663 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2664 const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2665 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2666 for (const SDNode *SUNode = SU->getNode(); SUNode; 2667 SUNode = SUNode->getGluedNode()) { 2668 if (!SUNode->isMachineOpcode()) 2669 continue; 2670 const unsigned *SUImpDefs = 2671 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2672 if (!SUImpDefs) 2673 return false; 2674 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2675 EVT VT = N->getValueType(i); 2676 if (VT == MVT::Glue || VT == MVT::Other) 2677 continue; 2678 if (!N->hasAnyUseOfValue(i)) 2679 continue; 2680 unsigned Reg = ImpDefs[i - NumDefs]; 2681 for (;*SUImpDefs; ++SUImpDefs) { 2682 unsigned SUReg = *SUImpDefs; 2683 if (TRI->regsOverlap(Reg, SUReg)) 2684 return true; 2685 } 2686 } 2687 } 2688 return false; 2689 } 2690 2691 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2692 /// are not handled well by the general register pressure reduction 2693 /// heuristics. When presented with code like this: 2694 /// 2695 /// N 2696 /// / | 2697 /// / | 2698 /// U store 2699 /// | 2700 /// ... 2701 /// 2702 /// the heuristics tend to push the store up, but since the 2703 /// operand of the store has another use (U), this would increase 2704 /// the length of that other use (the U->N edge). 2705 /// 2706 /// This function transforms code like the above to route U's 2707 /// dependence through the store when possible, like this: 2708 /// 2709 /// N 2710 /// || 2711 /// || 2712 /// store 2713 /// | 2714 /// U 2715 /// | 2716 /// ... 2717 /// 2718 /// This results in the store being scheduled immediately 2719 /// after N, which shortens the U->N live range, reducing 2720 /// register pressure. 2721 /// 2722 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2723 // Visit all the nodes in topological order, working top-down. 2724 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2725 SUnit *SU = &(*SUnits)[i]; 2726 // For now, only look at nodes with no data successors, such as stores. 2727 // These are especially important, due to the heuristics in 2728 // getNodePriority for nodes with no data successors. 2729 if (SU->NumSuccs != 0) 2730 continue; 2731 // For now, only look at nodes with exactly one data predecessor. 2732 if (SU->NumPreds != 1) 2733 continue; 2734 // Avoid prescheduling copies to virtual registers, which don't behave 2735 // like other nodes from the perspective of scheduling heuristics. 2736 if (SDNode *N = SU->getNode()) 2737 if (N->getOpcode() == ISD::CopyToReg && 2738 TargetRegisterInfo::isVirtualRegister 2739 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2740 continue; 2741 2742 // Locate the single data predecessor. 2743 SUnit *PredSU = 0; 2744 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2745 EE = SU->Preds.end(); II != EE; ++II) 2746 if (!II->isCtrl()) { 2747 PredSU = II->getSUnit(); 2748 break; 2749 } 2750 assert(PredSU); 2751 2752 // Don't rewrite edges that carry physregs, because that requires additional 2753 // support infrastructure. 2754 if (PredSU->hasPhysRegDefs) 2755 continue; 2756 // Short-circuit the case where SU is PredSU's only data successor. 2757 if (PredSU->NumSuccs == 1) 2758 continue; 2759 // Avoid prescheduling to copies from virtual registers, which don't behave 2760 // like other nodes from the perspective of scheduling heuristics. 2761 if (SDNode *N = SU->getNode()) 2762 if (N->getOpcode() == ISD::CopyFromReg && 2763 TargetRegisterInfo::isVirtualRegister 2764 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2765 continue; 2766 2767 // Perform checks on the successors of PredSU. 2768 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2769 EE = PredSU->Succs.end(); II != EE; ++II) { 2770 SUnit *PredSuccSU = II->getSUnit(); 2771 if (PredSuccSU == SU) continue; 2772 // If PredSU has another successor with no data successors, for 2773 // now don't attempt to choose either over the other. 2774 if (PredSuccSU->NumSuccs == 0) 2775 goto outer_loop_continue; 2776 // Don't break physical register dependencies. 2777 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2778 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2779 goto outer_loop_continue; 2780 // Don't introduce graph cycles. 2781 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2782 goto outer_loop_continue; 2783 } 2784 2785 // Ok, the transformation is safe and the heuristics suggest it is 2786 // profitable. Update the graph. 2787 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2788 << " next to PredSU #" << PredSU->NodeNum 2789 << " to guide scheduling in the presence of multiple uses\n"); 2790 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2791 SDep Edge = PredSU->Succs[i]; 2792 assert(!Edge.isAssignedRegDep()); 2793 SUnit *SuccSU = Edge.getSUnit(); 2794 if (SuccSU != SU) { 2795 Edge.setSUnit(PredSU); 2796 scheduleDAG->RemovePred(SuccSU, Edge); 2797 scheduleDAG->AddPred(SU, Edge); 2798 Edge.setSUnit(SU); 2799 scheduleDAG->AddPred(SuccSU, Edge); 2800 --i; 2801 } 2802 } 2803 outer_loop_continue:; 2804 } 2805 } 2806 2807 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2808 /// it as a def&use operand. Add a pseudo control edge from it to the other 2809 /// node (if it won't create a cycle) so the two-address one will be scheduled 2810 /// first (lower in the schedule). If both nodes are two-address, favor the 2811 /// one that has a CopyToReg use (more likely to be a loop induction update). 2812 /// If both are two-address, but one is commutable while the other is not 2813 /// commutable, favor the one that's not commutable. 2814 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2815 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2816 SUnit *SU = &(*SUnits)[i]; 2817 if (!SU->isTwoAddress) 2818 continue; 2819 2820 SDNode *Node = SU->getNode(); 2821 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2822 continue; 2823 2824 bool isLiveOut = hasOnlyLiveOutUses(SU); 2825 unsigned Opc = Node->getMachineOpcode(); 2826 const MCInstrDesc &MCID = TII->get(Opc); 2827 unsigned NumRes = MCID.getNumDefs(); 2828 unsigned NumOps = MCID.getNumOperands() - NumRes; 2829 for (unsigned j = 0; j != NumOps; ++j) { 2830 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2831 continue; 2832 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2833 if (DU->getNodeId() == -1) 2834 continue; 2835 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2836 if (!DUSU) continue; 2837 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2838 E = DUSU->Succs.end(); I != E; ++I) { 2839 if (I->isCtrl()) continue; 2840 SUnit *SuccSU = I->getSUnit(); 2841 if (SuccSU == SU) 2842 continue; 2843 // Be conservative. Ignore if nodes aren't at roughly the same 2844 // depth and height. 2845 if (SuccSU->getHeight() < SU->getHeight() && 2846 (SU->getHeight() - SuccSU->getHeight()) > 1) 2847 continue; 2848 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2849 // constrains whatever is using the copy, instead of the copy 2850 // itself. In the case that the copy is coalesced, this 2851 // preserves the intent of the pseudo two-address heurietics. 2852 while (SuccSU->Succs.size() == 1 && 2853 SuccSU->getNode()->isMachineOpcode() && 2854 SuccSU->getNode()->getMachineOpcode() == 2855 TargetOpcode::COPY_TO_REGCLASS) 2856 SuccSU = SuccSU->Succs.front().getSUnit(); 2857 // Don't constrain non-instruction nodes. 2858 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2859 continue; 2860 // Don't constrain nodes with physical register defs if the 2861 // predecessor can clobber them. 2862 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2863 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2864 continue; 2865 } 2866 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2867 // these may be coalesced away. We want them close to their uses. 2868 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2869 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2870 SuccOpc == TargetOpcode::INSERT_SUBREG || 2871 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2872 continue; 2873 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2874 (!canClobber(SuccSU, DUSU) || 2875 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2876 (!SU->isCommutable && SuccSU->isCommutable)) && 2877 !scheduleDAG->IsReachable(SuccSU, SU)) { 2878 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2879 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2880 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2881 /*Reg=*/0, /*isNormalMemory=*/false, 2882 /*isMustAlias=*/false, 2883 /*isArtificial=*/true)); 2884 } 2885 } 2886 } 2887 } 2888 } 2889 2890 /// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled 2891 /// predecessors of the successors of the SUnit SU. Stop when the provided 2892 /// limit is exceeded. 2893 static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, 2894 unsigned Limit) { 2895 unsigned Sum = 0; 2896 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2897 I != E; ++I) { 2898 const SUnit *SuccSU = I->getSUnit(); 2899 for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), 2900 EE = SuccSU->Preds.end(); II != EE; ++II) { 2901 SUnit *PredSU = II->getSUnit(); 2902 if (!PredSU->isScheduled) 2903 if (++Sum > Limit) 2904 return Sum; 2905 } 2906 } 2907 return Sum; 2908 } 2909 2910 2911 // Top down 2912 bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { 2913 if (int res = checkSpecialNodes(left, right)) 2914 return res < 0; 2915 2916 unsigned LPriority = SPQ->getNodePriority(left); 2917 unsigned RPriority = SPQ->getNodePriority(right); 2918 bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); 2919 bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); 2920 bool LIsFloater = LIsTarget && left->NumPreds == 0; 2921 bool RIsFloater = RIsTarget && right->NumPreds == 0; 2922 unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; 2923 unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; 2924 2925 if (left->NumSuccs == 0 && right->NumSuccs != 0) 2926 return false; 2927 else if (left->NumSuccs != 0 && right->NumSuccs == 0) 2928 return true; 2929 2930 if (LIsFloater) 2931 LBonus -= 2; 2932 if (RIsFloater) 2933 RBonus -= 2; 2934 if (left->NumSuccs == 1) 2935 LBonus += 2; 2936 if (right->NumSuccs == 1) 2937 RBonus += 2; 2938 2939 if (LPriority+LBonus != RPriority+RBonus) 2940 return LPriority+LBonus < RPriority+RBonus; 2941 2942 if (left->getDepth() != right->getDepth()) 2943 return left->getDepth() < right->getDepth(); 2944 2945 if (left->NumSuccsLeft != right->NumSuccsLeft) 2946 return left->NumSuccsLeft > right->NumSuccsLeft; 2947 2948 assert(left->NodeQueueId && right->NodeQueueId && 2949 "NodeQueueId cannot be zero"); 2950 return (left->NodeQueueId > right->NodeQueueId); 2951 } 2952 2953 //===----------------------------------------------------------------------===// 2954 // Public Constructor Functions 2955 //===----------------------------------------------------------------------===// 2956 2957 llvm::ScheduleDAGSDNodes * 2958 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2959 CodeGenOpt::Level OptLevel) { 2960 const TargetMachine &TM = IS->TM; 2961 const TargetInstrInfo *TII = TM.getInstrInfo(); 2962 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2963 2964 BURegReductionPriorityQueue *PQ = 2965 new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2966 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2967 PQ->setScheduleDAG(SD); 2968 return SD; 2969 } 2970 2971 llvm::ScheduleDAGSDNodes * 2972 llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, 2973 CodeGenOpt::Level OptLevel) { 2974 const TargetMachine &TM = IS->TM; 2975 const TargetInstrInfo *TII = TM.getInstrInfo(); 2976 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2977 2978 TDRegReductionPriorityQueue *PQ = 2979 new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2980 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2981 PQ->setScheduleDAG(SD); 2982 return SD; 2983 } 2984 2985 llvm::ScheduleDAGSDNodes * 2986 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2987 CodeGenOpt::Level OptLevel) { 2988 const TargetMachine &TM = IS->TM; 2989 const TargetInstrInfo *TII = TM.getInstrInfo(); 2990 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2991 2992 SrcRegReductionPriorityQueue *PQ = 2993 new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); 2994 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2995 PQ->setScheduleDAG(SD); 2996 return SD; 2997 } 2998 2999 llvm::ScheduleDAGSDNodes * 3000 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 3001 CodeGenOpt::Level OptLevel) { 3002 const TargetMachine &TM = IS->TM; 3003 const TargetInstrInfo *TII = TM.getInstrInfo(); 3004 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 3005 const TargetLowering *TLI = &IS->getTargetLowering(); 3006 3007 HybridBURRPriorityQueue *PQ = 3008 new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 3009 3010 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3011 PQ->setScheduleDAG(SD); 3012 return SD; 3013 } 3014 3015 llvm::ScheduleDAGSDNodes * 3016 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 3017 CodeGenOpt::Level OptLevel) { 3018 const TargetMachine &TM = IS->TM; 3019 const TargetInstrInfo *TII = TM.getInstrInfo(); 3020 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 3021 const TargetLowering *TLI = &IS->getTargetLowering(); 3022 3023 ILPBURRPriorityQueue *PQ = 3024 new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI); 3025 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3026 PQ->setScheduleDAG(SD); 3027 return SD; 3028 } 3029