1 //===----- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements bottom-up and top-down register pressure reduction list 11 // schedulers, using standard algorithms. The basic approach uses a priority 12 // queue of available nodes to schedule. One at a time, nodes are taken from 13 // the priority queue (thus in priority order), checked for legality to 14 // schedule, and emitted if legal. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #define DEBUG_TYPE "pre-RA-sched" 19 #include "ScheduleDAGSDNodes.h" 20 #include "llvm/InlineAsm.h" 21 #include "llvm/CodeGen/SchedulerRegistry.h" 22 #include "llvm/CodeGen/SelectionDAGISel.h" 23 #include "llvm/CodeGen/ScheduleHazardRecognizer.h" 24 #include "llvm/Target/TargetRegisterInfo.h" 25 #include "llvm/Target/TargetData.h" 26 #include "llvm/Target/TargetMachine.h" 27 #include "llvm/Target/TargetInstrInfo.h" 28 #include "llvm/Target/TargetLowering.h" 29 #include "llvm/ADT/SmallSet.h" 30 #include "llvm/ADT/Statistic.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include <climits> 36 using namespace llvm; 37 38 STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); 39 STATISTIC(NumUnfolds, "Number of nodes unfolded"); 40 STATISTIC(NumDups, "Number of duplicated nodes"); 41 STATISTIC(NumPRCopies, "Number of physical register copies"); 42 43 static RegisterScheduler 44 burrListDAGScheduler("list-burr", 45 "Bottom-up register reduction list scheduling", 46 createBURRListDAGScheduler); 47 static RegisterScheduler 48 sourceListDAGScheduler("source", 49 "Similar to list-burr but schedules in source " 50 "order when possible", 51 createSourceListDAGScheduler); 52 53 static RegisterScheduler 54 hybridListDAGScheduler("list-hybrid", 55 "Bottom-up register pressure aware list scheduling " 56 "which tries to balance latency and register pressure", 57 createHybridListDAGScheduler); 58 59 static RegisterScheduler 60 ILPListDAGScheduler("list-ilp", 61 "Bottom-up register pressure aware list scheduling " 62 "which tries to balance ILP and register pressure", 63 createILPListDAGScheduler); 64 65 static cl::opt<bool> DisableSchedCycles( 66 "disable-sched-cycles", cl::Hidden, cl::init(false), 67 cl::desc("Disable cycle-level precision during preRA scheduling")); 68 69 // Temporary sched=list-ilp flags until the heuristics are robust. 70 // Some options are also available under sched=list-hybrid. 71 static cl::opt<bool> DisableSchedRegPressure( 72 "disable-sched-reg-pressure", cl::Hidden, cl::init(false), 73 cl::desc("Disable regpressure priority in sched=list-ilp")); 74 static cl::opt<bool> DisableSchedLiveUses( 75 "disable-sched-live-uses", cl::Hidden, cl::init(true), 76 cl::desc("Disable live use priority in sched=list-ilp")); 77 static cl::opt<bool> DisableSchedVRegCycle( 78 "disable-sched-vrcycle", cl::Hidden, cl::init(false), 79 cl::desc("Disable virtual register cycle interference checks")); 80 static cl::opt<bool> DisableSchedPhysRegJoin( 81 "disable-sched-physreg-join", cl::Hidden, cl::init(false), 82 cl::desc("Disable physreg def-use affinity")); 83 static cl::opt<bool> DisableSchedStalls( 84 "disable-sched-stalls", cl::Hidden, cl::init(true), 85 cl::desc("Disable no-stall priority in sched=list-ilp")); 86 static cl::opt<bool> DisableSchedCriticalPath( 87 "disable-sched-critical-path", cl::Hidden, cl::init(false), 88 cl::desc("Disable critical path priority in sched=list-ilp")); 89 static cl::opt<bool> DisableSchedHeight( 90 "disable-sched-height", cl::Hidden, cl::init(false), 91 cl::desc("Disable scheduled-height priority in sched=list-ilp")); 92 static cl::opt<bool> Disable2AddrHack( 93 "disable-2addr-hack", cl::Hidden, cl::init(true), 94 cl::desc("Disable scheduler's two-address hack")); 95 96 static cl::opt<int> MaxReorderWindow( 97 "max-sched-reorder", cl::Hidden, cl::init(6), 98 cl::desc("Number of instructions to allow ahead of the critical path " 99 "in sched=list-ilp")); 100 101 static cl::opt<unsigned> AvgIPC( 102 "sched-avg-ipc", cl::Hidden, cl::init(1), 103 cl::desc("Average inst/cycle whan no target itinerary exists.")); 104 105 namespace { 106 //===----------------------------------------------------------------------===// 107 /// ScheduleDAGRRList - The actual register reduction list scheduler 108 /// implementation. This supports both top-down and bottom-up scheduling. 109 /// 110 class ScheduleDAGRRList : public ScheduleDAGSDNodes { 111 private: 112 /// NeedLatency - True if the scheduler will make use of latency information. 113 /// 114 bool NeedLatency; 115 116 /// AvailableQueue - The priority queue to use for the available SUnits. 117 SchedulingPriorityQueue *AvailableQueue; 118 119 /// PendingQueue - This contains all of the instructions whose operands have 120 /// been issued, but their results are not ready yet (due to the latency of 121 /// the operation). Once the operands becomes available, the instruction is 122 /// added to the AvailableQueue. 123 std::vector<SUnit*> PendingQueue; 124 125 /// HazardRec - The hazard recognizer to use. 126 ScheduleHazardRecognizer *HazardRec; 127 128 /// CurCycle - The current scheduler state corresponds to this cycle. 129 unsigned CurCycle; 130 131 /// MinAvailableCycle - Cycle of the soonest available instruction. 132 unsigned MinAvailableCycle; 133 134 /// IssueCount - Count instructions issued in this cycle 135 /// Currently valid only for bottom-up scheduling. 136 unsigned IssueCount; 137 138 /// LiveRegDefs - A set of physical registers and their definition 139 /// that are "live". These nodes must be scheduled before any other nodes that 140 /// modifies the registers can be scheduled. 141 unsigned NumLiveRegs; 142 std::vector<SUnit*> LiveRegDefs; 143 std::vector<SUnit*> LiveRegGens; 144 145 /// Topo - A topological ordering for SUnits which permits fast IsReachable 146 /// and similar queries. 147 ScheduleDAGTopologicalSort Topo; 148 149 // Hack to keep track of the inverse of FindCallSeqStart without more crazy 150 // DAG crawling. 151 DenseMap<SUnit*, SUnit*> CallSeqEndForStart; 152 153 public: 154 ScheduleDAGRRList(MachineFunction &mf, bool needlatency, 155 SchedulingPriorityQueue *availqueue, 156 CodeGenOpt::Level OptLevel) 157 : ScheduleDAGSDNodes(mf), 158 NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), 159 Topo(SUnits) { 160 161 const TargetMachine &tm = mf.getTarget(); 162 if (DisableSchedCycles || !NeedLatency) 163 HazardRec = new ScheduleHazardRecognizer(); 164 else 165 HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); 166 } 167 168 ~ScheduleDAGRRList() { 169 delete HazardRec; 170 delete AvailableQueue; 171 } 172 173 void Schedule(); 174 175 ScheduleHazardRecognizer *getHazardRec() { return HazardRec; } 176 177 /// IsReachable - Checks if SU is reachable from TargetSU. 178 bool IsReachable(const SUnit *SU, const SUnit *TargetSU) { 179 return Topo.IsReachable(SU, TargetSU); 180 } 181 182 /// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will 183 /// create a cycle. 184 bool WillCreateCycle(SUnit *SU, SUnit *TargetSU) { 185 return Topo.WillCreateCycle(SU, TargetSU); 186 } 187 188 /// AddPred - adds a predecessor edge to SUnit SU. 189 /// This returns true if this is a new predecessor. 190 /// Updates the topological ordering if required. 191 void AddPred(SUnit *SU, const SDep &D) { 192 Topo.AddPred(SU, D.getSUnit()); 193 SU->addPred(D); 194 } 195 196 /// RemovePred - removes a predecessor edge from SUnit SU. 197 /// This returns true if an edge was removed. 198 /// Updates the topological ordering if required. 199 void RemovePred(SUnit *SU, const SDep &D) { 200 Topo.RemovePred(SU, D.getSUnit()); 201 SU->removePred(D); 202 } 203 204 private: 205 bool isReady(SUnit *SU) { 206 return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || 207 AvailableQueue->isReady(SU); 208 } 209 210 void ReleasePred(SUnit *SU, const SDep *PredEdge); 211 void ReleasePredecessors(SUnit *SU); 212 void ReleasePending(); 213 void AdvanceToCycle(unsigned NextCycle); 214 void AdvancePastStalls(SUnit *SU); 215 void EmitNode(SUnit *SU); 216 void ScheduleNodeBottomUp(SUnit*); 217 void CapturePred(SDep *PredEdge); 218 void UnscheduleNodeBottomUp(SUnit*); 219 void RestoreHazardCheckerBottomUp(); 220 void BacktrackBottomUp(SUnit*, SUnit*); 221 SUnit *CopyAndMoveSuccessors(SUnit*); 222 void InsertCopiesAndMoveSuccs(SUnit*, unsigned, 223 const TargetRegisterClass*, 224 const TargetRegisterClass*, 225 SmallVector<SUnit*, 2>&); 226 bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); 227 228 SUnit *PickNodeToScheduleBottomUp(); 229 void ListScheduleBottomUp(); 230 231 /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. 232 /// Updates the topological ordering if required. 233 SUnit *CreateNewSUnit(SDNode *N) { 234 unsigned NumSUnits = SUnits.size(); 235 SUnit *NewNode = newSUnit(N); 236 // Update the topological ordering. 237 if (NewNode->NodeNum >= NumSUnits) 238 Topo.InitDAGTopologicalSorting(); 239 return NewNode; 240 } 241 242 /// CreateClone - Creates a new SUnit from an existing one. 243 /// Updates the topological ordering if required. 244 SUnit *CreateClone(SUnit *N) { 245 unsigned NumSUnits = SUnits.size(); 246 SUnit *NewNode = Clone(N); 247 // Update the topological ordering. 248 if (NewNode->NodeNum >= NumSUnits) 249 Topo.InitDAGTopologicalSorting(); 250 return NewNode; 251 } 252 253 /// forceUnitLatencies - Register-pressure-reducing scheduling doesn't 254 /// need actual latency information but the hybrid scheduler does. 255 bool forceUnitLatencies() const { 256 return !NeedLatency; 257 } 258 }; 259 } // end anonymous namespace 260 261 /// GetCostForDef - Looks up the register class and cost for a given definition. 262 /// Typically this just means looking up the representative register class, 263 /// but for untyped values (MVT::Untyped) it means inspecting the node's 264 /// opcode to determine what register class is being generated. 265 static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, 266 const TargetLowering *TLI, 267 const TargetInstrInfo *TII, 268 const TargetRegisterInfo *TRI, 269 unsigned &RegClass, unsigned &Cost) { 270 EVT VT = RegDefPos.GetValue(); 271 272 // Special handling for untyped values. These values can only come from 273 // the expansion of custom DAG-to-DAG patterns. 274 if (VT == MVT::Untyped) { 275 const SDNode *Node = RegDefPos.GetNode(); 276 unsigned Opcode = Node->getMachineOpcode(); 277 278 if (Opcode == TargetOpcode::REG_SEQUENCE) { 279 unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 280 const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); 281 RegClass = RC->getID(); 282 Cost = 1; 283 return; 284 } 285 286 unsigned Idx = RegDefPos.GetIdx(); 287 const MCInstrDesc Desc = TII->get(Opcode); 288 const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI); 289 RegClass = RC->getID(); 290 // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a 291 // better way to determine it. 292 Cost = 1; 293 } else { 294 RegClass = TLI->getRepRegClassFor(VT)->getID(); 295 Cost = TLI->getRepRegClassCostFor(VT); 296 } 297 } 298 299 /// Schedule - Schedule the DAG using list scheduling. 300 void ScheduleDAGRRList::Schedule() { 301 DEBUG(dbgs() 302 << "********** List Scheduling BB#" << BB->getNumber() 303 << " '" << BB->getName() << "' **********\n"); 304 305 CurCycle = 0; 306 IssueCount = 0; 307 MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; 308 NumLiveRegs = 0; 309 // Allocate slots for each physical register, plus one for a special register 310 // to track the virtual resource of a calling sequence. 311 LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); 312 LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); 313 CallSeqEndForStart.clear(); 314 315 // Build the scheduling graph. 316 BuildSchedGraph(NULL); 317 318 DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) 319 SUnits[su].dumpAll(this)); 320 Topo.InitDAGTopologicalSorting(); 321 322 AvailableQueue->initNodes(SUnits); 323 324 HazardRec->Reset(); 325 326 // Execute the actual scheduling loop. 327 ListScheduleBottomUp(); 328 329 AvailableQueue->releaseState(); 330 331 DEBUG({ 332 dbgs() << "*** Final schedule ***\n"; 333 dumpSchedule(); 334 dbgs() << '\n'; 335 }); 336 } 337 338 //===----------------------------------------------------------------------===// 339 // Bottom-Up Scheduling 340 //===----------------------------------------------------------------------===// 341 342 /// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. Add it to 343 /// the AvailableQueue if the count reaches zero. Also update its cycle bound. 344 void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { 345 SUnit *PredSU = PredEdge->getSUnit(); 346 347 #ifndef NDEBUG 348 if (PredSU->NumSuccsLeft == 0) { 349 dbgs() << "*** Scheduling failed! ***\n"; 350 PredSU->dump(this); 351 dbgs() << " has been released too many times!\n"; 352 llvm_unreachable(0); 353 } 354 #endif 355 --PredSU->NumSuccsLeft; 356 357 if (!forceUnitLatencies()) { 358 // Updating predecessor's height. This is now the cycle when the 359 // predecessor can be scheduled without causing a pipeline stall. 360 PredSU->setHeightToAtLeast(SU->getHeight() + PredEdge->getLatency()); 361 } 362 363 // If all the node's successors are scheduled, this node is ready 364 // to be scheduled. Ignore the special EntrySU node. 365 if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) { 366 PredSU->isAvailable = true; 367 368 unsigned Height = PredSU->getHeight(); 369 if (Height < MinAvailableCycle) 370 MinAvailableCycle = Height; 371 372 if (isReady(PredSU)) { 373 AvailableQueue->push(PredSU); 374 } 375 // CapturePred and others may have left the node in the pending queue, avoid 376 // adding it twice. 377 else if (!PredSU->isPending) { 378 PredSU->isPending = true; 379 PendingQueue.push_back(PredSU); 380 } 381 } 382 } 383 384 /// IsChainDependent - Test if Outer is reachable from Inner through 385 /// chain dependencies. 386 static bool IsChainDependent(SDNode *Outer, SDNode *Inner, 387 unsigned NestLevel, 388 const TargetInstrInfo *TII) { 389 SDNode *N = Outer; 390 for (;;) { 391 if (N == Inner) 392 return true; 393 // For a TokenFactor, examine each operand. There may be multiple ways 394 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 395 // most nesting in order to ensure that we find the corresponding match. 396 if (N->getOpcode() == ISD::TokenFactor) { 397 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 398 if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) 399 return true; 400 return false; 401 } 402 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 403 if (N->isMachineOpcode()) { 404 if (N->getMachineOpcode() == 405 (unsigned)TII->getCallFrameDestroyOpcode()) { 406 ++NestLevel; 407 } else if (N->getMachineOpcode() == 408 (unsigned)TII->getCallFrameSetupOpcode()) { 409 if (NestLevel == 0) 410 return false; 411 --NestLevel; 412 } 413 } 414 // Otherwise, find the chain and continue climbing. 415 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 416 if (N->getOperand(i).getValueType() == MVT::Other) { 417 N = N->getOperand(i).getNode(); 418 goto found_chain_operand; 419 } 420 return false; 421 found_chain_operand:; 422 if (N->getOpcode() == ISD::EntryToken) 423 return false; 424 } 425 } 426 427 /// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate 428 /// the corresponding (lowered) CALLSEQ_BEGIN node. 429 /// 430 /// NestLevel and MaxNested are used in recursion to indcate the current level 431 /// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum 432 /// level seen so far. 433 /// 434 /// TODO: It would be better to give CALLSEQ_END an explicit operand to point 435 /// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. 436 static SDNode * 437 FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, 438 const TargetInstrInfo *TII) { 439 for (;;) { 440 // For a TokenFactor, examine each operand. There may be multiple ways 441 // to get to the CALLSEQ_BEGIN, but we need to find the path with the 442 // most nesting in order to ensure that we find the corresponding match. 443 if (N->getOpcode() == ISD::TokenFactor) { 444 SDNode *Best = 0; 445 unsigned BestMaxNest = MaxNest; 446 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 447 unsigned MyNestLevel = NestLevel; 448 unsigned MyMaxNest = MaxNest; 449 if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), 450 MyNestLevel, MyMaxNest, TII)) 451 if (!Best || (MyMaxNest > BestMaxNest)) { 452 Best = New; 453 BestMaxNest = MyMaxNest; 454 } 455 } 456 assert(Best); 457 MaxNest = BestMaxNest; 458 return Best; 459 } 460 // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. 461 if (N->isMachineOpcode()) { 462 if (N->getMachineOpcode() == 463 (unsigned)TII->getCallFrameDestroyOpcode()) { 464 ++NestLevel; 465 MaxNest = std::max(MaxNest, NestLevel); 466 } else if (N->getMachineOpcode() == 467 (unsigned)TII->getCallFrameSetupOpcode()) { 468 assert(NestLevel != 0); 469 --NestLevel; 470 if (NestLevel == 0) 471 return N; 472 } 473 } 474 // Otherwise, find the chain and continue climbing. 475 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 476 if (N->getOperand(i).getValueType() == MVT::Other) { 477 N = N->getOperand(i).getNode(); 478 goto found_chain_operand; 479 } 480 return 0; 481 found_chain_operand:; 482 if (N->getOpcode() == ISD::EntryToken) 483 return 0; 484 } 485 } 486 487 /// Call ReleasePred for each predecessor, then update register live def/gen. 488 /// Always update LiveRegDefs for a register dependence even if the current SU 489 /// also defines the register. This effectively create one large live range 490 /// across a sequence of two-address node. This is important because the 491 /// entire chain must be scheduled together. Example: 492 /// 493 /// flags = (3) add 494 /// flags = (2) addc flags 495 /// flags = (1) addc flags 496 /// 497 /// results in 498 /// 499 /// LiveRegDefs[flags] = 3 500 /// LiveRegGens[flags] = 1 501 /// 502 /// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid 503 /// interference on flags. 504 void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { 505 // Bottom up: release predecessors 506 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 507 I != E; ++I) { 508 ReleasePred(SU, &*I); 509 if (I->isAssignedRegDep()) { 510 // This is a physical register dependency and it's impossible or 511 // expensive to copy the register. Make sure nothing that can 512 // clobber the register is scheduled between the predecessor and 513 // this node. 514 SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef; 515 assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) && 516 "interference on register dependence"); 517 LiveRegDefs[I->getReg()] = I->getSUnit(); 518 if (!LiveRegGens[I->getReg()]) { 519 ++NumLiveRegs; 520 LiveRegGens[I->getReg()] = SU; 521 } 522 } 523 } 524 525 // If we're scheduling a lowered CALLSEQ_END, find the corresponding 526 // CALLSEQ_BEGIN. Inject an artificial physical register dependence between 527 // these nodes, to prevent other calls from being interscheduled with them. 528 unsigned CallResource = TRI->getNumRegs(); 529 if (!LiveRegDefs[CallResource]) 530 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) 531 if (Node->isMachineOpcode() && 532 Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 533 unsigned NestLevel = 0; 534 unsigned MaxNest = 0; 535 SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); 536 537 SUnit *Def = &SUnits[N->getNodeId()]; 538 CallSeqEndForStart[Def] = SU; 539 540 ++NumLiveRegs; 541 LiveRegDefs[CallResource] = Def; 542 LiveRegGens[CallResource] = SU; 543 break; 544 } 545 } 546 547 /// Check to see if any of the pending instructions are ready to issue. If 548 /// so, add them to the available queue. 549 void ScheduleDAGRRList::ReleasePending() { 550 if (DisableSchedCycles) { 551 assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); 552 return; 553 } 554 555 // If the available queue is empty, it is safe to reset MinAvailableCycle. 556 if (AvailableQueue->empty()) 557 MinAvailableCycle = UINT_MAX; 558 559 // Check to see if any of the pending instructions are ready to issue. If 560 // so, add them to the available queue. 561 for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { 562 unsigned ReadyCycle = PendingQueue[i]->getHeight(); 563 if (ReadyCycle < MinAvailableCycle) 564 MinAvailableCycle = ReadyCycle; 565 566 if (PendingQueue[i]->isAvailable) { 567 if (!isReady(PendingQueue[i])) 568 continue; 569 AvailableQueue->push(PendingQueue[i]); 570 } 571 PendingQueue[i]->isPending = false; 572 PendingQueue[i] = PendingQueue.back(); 573 PendingQueue.pop_back(); 574 --i; --e; 575 } 576 } 577 578 /// Move the scheduler state forward by the specified number of Cycles. 579 void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { 580 if (NextCycle <= CurCycle) 581 return; 582 583 IssueCount = 0; 584 AvailableQueue->setCurCycle(NextCycle); 585 if (!HazardRec->isEnabled()) { 586 // Bypass lots of virtual calls in case of long latency. 587 CurCycle = NextCycle; 588 } 589 else { 590 for (; CurCycle != NextCycle; ++CurCycle) { 591 HazardRec->RecedeCycle(); 592 } 593 } 594 // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the 595 // available Q to release pending nodes at least once before popping. 596 ReleasePending(); 597 } 598 599 /// Move the scheduler state forward until the specified node's dependents are 600 /// ready and can be scheduled with no resource conflicts. 601 void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { 602 if (DisableSchedCycles) 603 return; 604 605 // FIXME: Nodes such as CopyFromReg probably should not advance the current 606 // cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node 607 // has predecessors the cycle will be advanced when they are scheduled. 608 // But given the crude nature of modeling latency though such nodes, we 609 // currently need to treat these nodes like real instructions. 610 // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; 611 612 unsigned ReadyCycle = SU->getHeight(); 613 614 // Bump CurCycle to account for latency. We assume the latency of other 615 // available instructions may be hidden by the stall (not a full pipe stall). 616 // This updates the hazard recognizer's cycle before reserving resources for 617 // this instruction. 618 AdvanceToCycle(ReadyCycle); 619 620 // Calls are scheduled in their preceding cycle, so don't conflict with 621 // hazards from instructions after the call. EmitNode will reset the 622 // scoreboard state before emitting the call. 623 if (SU->isCall) 624 return; 625 626 // FIXME: For resource conflicts in very long non-pipelined stages, we 627 // should probably skip ahead here to avoid useless scoreboard checks. 628 int Stalls = 0; 629 while (true) { 630 ScheduleHazardRecognizer::HazardType HT = 631 HazardRec->getHazardType(SU, -Stalls); 632 633 if (HT == ScheduleHazardRecognizer::NoHazard) 634 break; 635 636 ++Stalls; 637 } 638 AdvanceToCycle(CurCycle + Stalls); 639 } 640 641 /// Record this SUnit in the HazardRecognizer. 642 /// Does not update CurCycle. 643 void ScheduleDAGRRList::EmitNode(SUnit *SU) { 644 if (!HazardRec->isEnabled()) 645 return; 646 647 // Check for phys reg copy. 648 if (!SU->getNode()) 649 return; 650 651 switch (SU->getNode()->getOpcode()) { 652 default: 653 assert(SU->getNode()->isMachineOpcode() && 654 "This target-independent node should not be scheduled."); 655 break; 656 case ISD::MERGE_VALUES: 657 case ISD::TokenFactor: 658 case ISD::CopyToReg: 659 case ISD::CopyFromReg: 660 case ISD::EH_LABEL: 661 // Noops don't affect the scoreboard state. Copies are likely to be 662 // removed. 663 return; 664 case ISD::INLINEASM: 665 // For inline asm, clear the pipeline state. 666 HazardRec->Reset(); 667 return; 668 } 669 if (SU->isCall) { 670 // Calls are scheduled with their preceding instructions. For bottom-up 671 // scheduling, clear the pipeline state before emitting. 672 HazardRec->Reset(); 673 } 674 675 HazardRec->EmitInstruction(SU); 676 } 677 678 static void resetVRegCycle(SUnit *SU); 679 680 /// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending 681 /// count of its predecessors. If a predecessor pending count is zero, add it to 682 /// the Available queue. 683 void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { 684 DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: "); 685 DEBUG(SU->dump(this)); 686 687 #ifndef NDEBUG 688 if (CurCycle < SU->getHeight()) 689 DEBUG(dbgs() << " Height [" << SU->getHeight() 690 << "] pipeline stall!\n"); 691 #endif 692 693 // FIXME: Do not modify node height. It may interfere with 694 // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the 695 // node its ready cycle can aid heuristics, and after scheduling it can 696 // indicate the scheduled cycle. 697 SU->setHeightToAtLeast(CurCycle); 698 699 // Reserve resources for the scheduled intruction. 700 EmitNode(SU); 701 702 Sequence.push_back(SU); 703 704 AvailableQueue->scheduledNode(SU); 705 706 // If HazardRec is disabled, and each inst counts as one cycle, then 707 // advance CurCycle before ReleasePredecessors to avoid useless pushes to 708 // PendingQueue for schedulers that implement HasReadyFilter. 709 if (!HazardRec->isEnabled() && AvgIPC < 2) 710 AdvanceToCycle(CurCycle + 1); 711 712 // Update liveness of predecessors before successors to avoid treating a 713 // two-address node as a live range def. 714 ReleasePredecessors(SU); 715 716 // Release all the implicit physical register defs that are live. 717 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 718 I != E; ++I) { 719 // LiveRegDegs[I->getReg()] != SU when SU is a two-address node. 720 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) { 721 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 722 --NumLiveRegs; 723 LiveRegDefs[I->getReg()] = NULL; 724 LiveRegGens[I->getReg()] = NULL; 725 } 726 } 727 // Release the special call resource dependence, if this is the beginning 728 // of a call. 729 unsigned CallResource = TRI->getNumRegs(); 730 if (LiveRegDefs[CallResource] == SU) 731 for (const SDNode *SUNode = SU->getNode(); SUNode; 732 SUNode = SUNode->getGluedNode()) { 733 if (SUNode->isMachineOpcode() && 734 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 735 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 736 --NumLiveRegs; 737 LiveRegDefs[CallResource] = NULL; 738 LiveRegGens[CallResource] = NULL; 739 } 740 } 741 742 resetVRegCycle(SU); 743 744 SU->isScheduled = true; 745 746 // Conditions under which the scheduler should eagerly advance the cycle: 747 // (1) No available instructions 748 // (2) All pipelines full, so available instructions must have hazards. 749 // 750 // If HazardRec is disabled, the cycle was pre-advanced before calling 751 // ReleasePredecessors. In that case, IssueCount should remain 0. 752 // 753 // Check AvailableQueue after ReleasePredecessors in case of zero latency. 754 if (HazardRec->isEnabled() || AvgIPC > 1) { 755 if (SU->getNode() && SU->getNode()->isMachineOpcode()) 756 ++IssueCount; 757 if ((HazardRec->isEnabled() && HazardRec->atIssueLimit()) 758 || (!HazardRec->isEnabled() && IssueCount == AvgIPC)) 759 AdvanceToCycle(CurCycle + 1); 760 } 761 } 762 763 /// CapturePred - This does the opposite of ReleasePred. Since SU is being 764 /// unscheduled, incrcease the succ left count of its predecessors. Remove 765 /// them from AvailableQueue if necessary. 766 void ScheduleDAGRRList::CapturePred(SDep *PredEdge) { 767 SUnit *PredSU = PredEdge->getSUnit(); 768 if (PredSU->isAvailable) { 769 PredSU->isAvailable = false; 770 if (!PredSU->isPending) 771 AvailableQueue->remove(PredSU); 772 } 773 774 assert(PredSU->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); 775 ++PredSU->NumSuccsLeft; 776 } 777 778 /// UnscheduleNodeBottomUp - Remove the node from the schedule, update its and 779 /// its predecessor states to reflect the change. 780 void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { 781 DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); 782 DEBUG(SU->dump(this)); 783 784 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 785 I != E; ++I) { 786 CapturePred(&*I); 787 if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){ 788 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 789 assert(LiveRegDefs[I->getReg()] == I->getSUnit() && 790 "Physical register dependency violated?"); 791 --NumLiveRegs; 792 LiveRegDefs[I->getReg()] = NULL; 793 LiveRegGens[I->getReg()] = NULL; 794 } 795 } 796 797 // Reclaim the special call resource dependence, if this is the beginning 798 // of a call. 799 unsigned CallResource = TRI->getNumRegs(); 800 for (const SDNode *SUNode = SU->getNode(); SUNode; 801 SUNode = SUNode->getGluedNode()) { 802 if (SUNode->isMachineOpcode() && 803 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { 804 ++NumLiveRegs; 805 LiveRegDefs[CallResource] = SU; 806 LiveRegGens[CallResource] = CallSeqEndForStart[SU]; 807 } 808 } 809 810 // Release the special call resource dependence, if this is the end 811 // of a call. 812 if (LiveRegGens[CallResource] == SU) 813 for (const SDNode *SUNode = SU->getNode(); SUNode; 814 SUNode = SUNode->getGluedNode()) { 815 if (SUNode->isMachineOpcode() && 816 SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 817 assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); 818 --NumLiveRegs; 819 LiveRegDefs[CallResource] = NULL; 820 LiveRegGens[CallResource] = NULL; 821 } 822 } 823 824 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 825 I != E; ++I) { 826 if (I->isAssignedRegDep()) { 827 if (!LiveRegDefs[I->getReg()]) 828 ++NumLiveRegs; 829 // This becomes the nearest def. Note that an earlier def may still be 830 // pending if this is a two-address node. 831 LiveRegDefs[I->getReg()] = SU; 832 if (LiveRegGens[I->getReg()] == NULL || 833 I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight()) 834 LiveRegGens[I->getReg()] = I->getSUnit(); 835 } 836 } 837 if (SU->getHeight() < MinAvailableCycle) 838 MinAvailableCycle = SU->getHeight(); 839 840 SU->setHeightDirty(); 841 SU->isScheduled = false; 842 SU->isAvailable = true; 843 if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { 844 // Don't make available until backtracking is complete. 845 SU->isPending = true; 846 PendingQueue.push_back(SU); 847 } 848 else { 849 AvailableQueue->push(SU); 850 } 851 AvailableQueue->unscheduledNode(SU); 852 } 853 854 /// After backtracking, the hazard checker needs to be restored to a state 855 /// corresponding the the current cycle. 856 void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() { 857 HazardRec->Reset(); 858 859 unsigned LookAhead = std::min((unsigned)Sequence.size(), 860 HazardRec->getMaxLookAhead()); 861 if (LookAhead == 0) 862 return; 863 864 std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead); 865 unsigned HazardCycle = (*I)->getHeight(); 866 for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) { 867 SUnit *SU = *I; 868 for (; SU->getHeight() > HazardCycle; ++HazardCycle) { 869 HazardRec->RecedeCycle(); 870 } 871 EmitNode(SU); 872 } 873 } 874 875 /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in 876 /// BTCycle in order to schedule a specific node. 877 void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { 878 SUnit *OldSU = Sequence.back(); 879 while (true) { 880 Sequence.pop_back(); 881 if (SU->isSucc(OldSU)) 882 // Don't try to remove SU from AvailableQueue. 883 SU->isAvailable = false; 884 // FIXME: use ready cycle instead of height 885 CurCycle = OldSU->getHeight(); 886 UnscheduleNodeBottomUp(OldSU); 887 AvailableQueue->setCurCycle(CurCycle); 888 if (OldSU == BtSU) 889 break; 890 OldSU = Sequence.back(); 891 } 892 893 assert(!SU->isSucc(OldSU) && "Something is wrong!"); 894 895 RestoreHazardCheckerBottomUp(); 896 897 ReleasePending(); 898 899 ++NumBacktracks; 900 } 901 902 static bool isOperandOf(const SUnit *SU, SDNode *N) { 903 for (const SDNode *SUNode = SU->getNode(); SUNode; 904 SUNode = SUNode->getGluedNode()) { 905 if (SUNode->isOperandOf(N)) 906 return true; 907 } 908 return false; 909 } 910 911 /// CopyAndMoveSuccessors - Clone the specified node and move its scheduled 912 /// successors to the newly created node. 913 SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { 914 SDNode *N = SU->getNode(); 915 if (!N) 916 return NULL; 917 918 if (SU->getNode()->getGluedNode()) 919 return NULL; 920 921 SUnit *NewSU; 922 bool TryUnfold = false; 923 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 924 EVT VT = N->getValueType(i); 925 if (VT == MVT::Glue) 926 return NULL; 927 else if (VT == MVT::Other) 928 TryUnfold = true; 929 } 930 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 931 const SDValue &Op = N->getOperand(i); 932 EVT VT = Op.getNode()->getValueType(Op.getResNo()); 933 if (VT == MVT::Glue) 934 return NULL; 935 } 936 937 if (TryUnfold) { 938 SmallVector<SDNode*, 2> NewNodes; 939 if (!TII->unfoldMemoryOperand(*DAG, N, NewNodes)) 940 return NULL; 941 942 // unfolding an x86 DEC64m operation results in store, dec, load which 943 // can't be handled here so quit 944 if (NewNodes.size() == 3) 945 return NULL; 946 947 DEBUG(dbgs() << "Unfolding SU #" << SU->NodeNum << "\n"); 948 assert(NewNodes.size() == 2 && "Expected a load folding node!"); 949 950 N = NewNodes[1]; 951 SDNode *LoadNode = NewNodes[0]; 952 unsigned NumVals = N->getNumValues(); 953 unsigned OldNumVals = SU->getNode()->getNumValues(); 954 for (unsigned i = 0; i != NumVals; ++i) 955 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), i), SDValue(N, i)); 956 DAG->ReplaceAllUsesOfValueWith(SDValue(SU->getNode(), OldNumVals-1), 957 SDValue(LoadNode, 1)); 958 959 // LoadNode may already exist. This can happen when there is another 960 // load from the same location and producing the same type of value 961 // but it has different alignment or volatileness. 962 bool isNewLoad = true; 963 SUnit *LoadSU; 964 if (LoadNode->getNodeId() != -1) { 965 LoadSU = &SUnits[LoadNode->getNodeId()]; 966 isNewLoad = false; 967 } else { 968 LoadSU = CreateNewSUnit(LoadNode); 969 LoadNode->setNodeId(LoadSU->NodeNum); 970 971 InitNumRegDefsLeft(LoadSU); 972 computeLatency(LoadSU); 973 } 974 975 SUnit *NewSU = CreateNewSUnit(N); 976 assert(N->getNodeId() == -1 && "Node already inserted!"); 977 N->setNodeId(NewSU->NodeNum); 978 979 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 980 for (unsigned i = 0; i != MCID.getNumOperands(); ++i) { 981 if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) { 982 NewSU->isTwoAddress = true; 983 break; 984 } 985 } 986 if (MCID.isCommutable()) 987 NewSU->isCommutable = true; 988 989 InitNumRegDefsLeft(NewSU); 990 computeLatency(NewSU); 991 992 // Record all the edges to and from the old SU, by category. 993 SmallVector<SDep, 4> ChainPreds; 994 SmallVector<SDep, 4> ChainSuccs; 995 SmallVector<SDep, 4> LoadPreds; 996 SmallVector<SDep, 4> NodePreds; 997 SmallVector<SDep, 4> NodeSuccs; 998 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 999 I != E; ++I) { 1000 if (I->isCtrl()) 1001 ChainPreds.push_back(*I); 1002 else if (isOperandOf(I->getSUnit(), LoadNode)) 1003 LoadPreds.push_back(*I); 1004 else 1005 NodePreds.push_back(*I); 1006 } 1007 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1008 I != E; ++I) { 1009 if (I->isCtrl()) 1010 ChainSuccs.push_back(*I); 1011 else 1012 NodeSuccs.push_back(*I); 1013 } 1014 1015 // Now assign edges to the newly-created nodes. 1016 for (unsigned i = 0, e = ChainPreds.size(); i != e; ++i) { 1017 const SDep &Pred = ChainPreds[i]; 1018 RemovePred(SU, Pred); 1019 if (isNewLoad) 1020 AddPred(LoadSU, Pred); 1021 } 1022 for (unsigned i = 0, e = LoadPreds.size(); i != e; ++i) { 1023 const SDep &Pred = LoadPreds[i]; 1024 RemovePred(SU, Pred); 1025 if (isNewLoad) 1026 AddPred(LoadSU, Pred); 1027 } 1028 for (unsigned i = 0, e = NodePreds.size(); i != e; ++i) { 1029 const SDep &Pred = NodePreds[i]; 1030 RemovePred(SU, Pred); 1031 AddPred(NewSU, Pred); 1032 } 1033 for (unsigned i = 0, e = NodeSuccs.size(); i != e; ++i) { 1034 SDep D = NodeSuccs[i]; 1035 SUnit *SuccDep = D.getSUnit(); 1036 D.setSUnit(SU); 1037 RemovePred(SuccDep, D); 1038 D.setSUnit(NewSU); 1039 AddPred(SuccDep, D); 1040 // Balance register pressure. 1041 if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled 1042 && !D.isCtrl() && NewSU->NumRegDefsLeft > 0) 1043 --NewSU->NumRegDefsLeft; 1044 } 1045 for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) { 1046 SDep D = ChainSuccs[i]; 1047 SUnit *SuccDep = D.getSUnit(); 1048 D.setSUnit(SU); 1049 RemovePred(SuccDep, D); 1050 if (isNewLoad) { 1051 D.setSUnit(LoadSU); 1052 AddPred(SuccDep, D); 1053 } 1054 } 1055 1056 // Add a data dependency to reflect that NewSU reads the value defined 1057 // by LoadSU. 1058 AddPred(NewSU, SDep(LoadSU, SDep::Data, LoadSU->Latency)); 1059 1060 if (isNewLoad) 1061 AvailableQueue->addNode(LoadSU); 1062 AvailableQueue->addNode(NewSU); 1063 1064 ++NumUnfolds; 1065 1066 if (NewSU->NumSuccsLeft == 0) { 1067 NewSU->isAvailable = true; 1068 return NewSU; 1069 } 1070 SU = NewSU; 1071 } 1072 1073 DEBUG(dbgs() << " Duplicating SU #" << SU->NodeNum << "\n"); 1074 NewSU = CreateClone(SU); 1075 1076 // New SUnit has the exact same predecessors. 1077 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1078 I != E; ++I) 1079 if (!I->isArtificial()) 1080 AddPred(NewSU, *I); 1081 1082 // Only copy scheduled successors. Cut them from old node's successor 1083 // list and move them over. 1084 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1085 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1086 I != E; ++I) { 1087 if (I->isArtificial()) 1088 continue; 1089 SUnit *SuccSU = I->getSUnit(); 1090 if (SuccSU->isScheduled) { 1091 SDep D = *I; 1092 D.setSUnit(NewSU); 1093 AddPred(SuccSU, D); 1094 D.setSUnit(SU); 1095 DelDeps.push_back(std::make_pair(SuccSU, D)); 1096 } 1097 } 1098 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1099 RemovePred(DelDeps[i].first, DelDeps[i].second); 1100 1101 AvailableQueue->updateNode(SU); 1102 AvailableQueue->addNode(NewSU); 1103 1104 ++NumDups; 1105 return NewSU; 1106 } 1107 1108 /// InsertCopiesAndMoveSuccs - Insert register copies and move all 1109 /// scheduled successors of the given SUnit to the last copy. 1110 void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, 1111 const TargetRegisterClass *DestRC, 1112 const TargetRegisterClass *SrcRC, 1113 SmallVector<SUnit*, 2> &Copies) { 1114 SUnit *CopyFromSU = CreateNewSUnit(NULL); 1115 CopyFromSU->CopySrcRC = SrcRC; 1116 CopyFromSU->CopyDstRC = DestRC; 1117 1118 SUnit *CopyToSU = CreateNewSUnit(NULL); 1119 CopyToSU->CopySrcRC = DestRC; 1120 CopyToSU->CopyDstRC = SrcRC; 1121 1122 // Only copy scheduled successors. Cut them from old node's successor 1123 // list and move them over. 1124 SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps; 1125 for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 1126 I != E; ++I) { 1127 if (I->isArtificial()) 1128 continue; 1129 SUnit *SuccSU = I->getSUnit(); 1130 if (SuccSU->isScheduled) { 1131 SDep D = *I; 1132 D.setSUnit(CopyToSU); 1133 AddPred(SuccSU, D); 1134 DelDeps.push_back(std::make_pair(SuccSU, *I)); 1135 } 1136 else { 1137 // Avoid scheduling the def-side copy before other successors. Otherwise 1138 // we could introduce another physreg interference on the copy and 1139 // continue inserting copies indefinitely. 1140 SDep D(CopyFromSU, SDep::Order, /*Latency=*/0, 1141 /*Reg=*/0, /*isNormalMemory=*/false, 1142 /*isMustAlias=*/false, /*isArtificial=*/true); 1143 AddPred(SuccSU, D); 1144 } 1145 } 1146 for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) 1147 RemovePred(DelDeps[i].first, DelDeps[i].second); 1148 1149 AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); 1150 AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); 1151 1152 AvailableQueue->updateNode(SU); 1153 AvailableQueue->addNode(CopyFromSU); 1154 AvailableQueue->addNode(CopyToSU); 1155 Copies.push_back(CopyFromSU); 1156 Copies.push_back(CopyToSU); 1157 1158 ++NumPRCopies; 1159 } 1160 1161 /// getPhysicalRegisterVT - Returns the ValueType of the physical register 1162 /// definition of the specified node. 1163 /// FIXME: Move to SelectionDAG? 1164 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, 1165 const TargetInstrInfo *TII) { 1166 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1167 assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); 1168 unsigned NumRes = MCID.getNumDefs(); 1169 for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { 1170 if (Reg == *ImpDef) 1171 break; 1172 ++NumRes; 1173 } 1174 return N->getValueType(NumRes); 1175 } 1176 1177 /// CheckForLiveRegDef - Return true and update live register vector if the 1178 /// specified register def of the specified SUnit clobbers any "live" registers. 1179 static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, 1180 std::vector<SUnit*> &LiveRegDefs, 1181 SmallSet<unsigned, 4> &RegAdded, 1182 SmallVector<unsigned, 4> &LRegs, 1183 const TargetRegisterInfo *TRI) { 1184 for (const uint16_t *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) { 1185 1186 // Check if Ref is live. 1187 if (!LiveRegDefs[*AliasI]) continue; 1188 1189 // Allow multiple uses of the same def. 1190 if (LiveRegDefs[*AliasI] == SU) continue; 1191 1192 // Add Reg to the set of interfering live regs. 1193 if (RegAdded.insert(*AliasI)) { 1194 LRegs.push_back(*AliasI); 1195 } 1196 } 1197 } 1198 1199 /// CheckForLiveRegDefMasked - Check for any live physregs that are clobbered 1200 /// by RegMask, and add them to LRegs. 1201 static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, 1202 std::vector<SUnit*> &LiveRegDefs, 1203 SmallSet<unsigned, 4> &RegAdded, 1204 SmallVector<unsigned, 4> &LRegs) { 1205 // Look at all live registers. Skip Reg0 and the special CallResource. 1206 for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { 1207 if (!LiveRegDefs[i]) continue; 1208 if (LiveRegDefs[i] == SU) continue; 1209 if (!MachineOperand::clobbersPhysReg(RegMask, i)) continue; 1210 if (RegAdded.insert(i)) 1211 LRegs.push_back(i); 1212 } 1213 } 1214 1215 /// getNodeRegMask - Returns the register mask attached to an SDNode, if any. 1216 static const uint32_t *getNodeRegMask(const SDNode *N) { 1217 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1218 if (const RegisterMaskSDNode *Op = 1219 dyn_cast<RegisterMaskSDNode>(N->getOperand(i).getNode())) 1220 return Op->getRegMask(); 1221 return NULL; 1222 } 1223 1224 /// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay 1225 /// scheduling of the given node to satisfy live physical register dependencies. 1226 /// If the specific node is the last one that's available to schedule, do 1227 /// whatever is necessary (i.e. backtracking or cloning) to make it possible. 1228 bool ScheduleDAGRRList:: 1229 DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) { 1230 if (NumLiveRegs == 0) 1231 return false; 1232 1233 SmallSet<unsigned, 4> RegAdded; 1234 // If this node would clobber any "live" register, then it's not ready. 1235 // 1236 // If SU is the currently live definition of the same register that it uses, 1237 // then we are free to schedule it. 1238 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1239 I != E; ++I) { 1240 if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU) 1241 CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs, 1242 RegAdded, LRegs, TRI); 1243 } 1244 1245 for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) { 1246 if (Node->getOpcode() == ISD::INLINEASM) { 1247 // Inline asm can clobber physical defs. 1248 unsigned NumOps = Node->getNumOperands(); 1249 if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue) 1250 --NumOps; // Ignore the glue operand. 1251 1252 for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { 1253 unsigned Flags = 1254 cast<ConstantSDNode>(Node->getOperand(i))->getZExtValue(); 1255 unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); 1256 1257 ++i; // Skip the ID value. 1258 if (InlineAsm::isRegDefKind(Flags) || 1259 InlineAsm::isRegDefEarlyClobberKind(Flags) || 1260 InlineAsm::isClobberKind(Flags)) { 1261 // Check for def of register or earlyclobber register. 1262 for (; NumVals; --NumVals, ++i) { 1263 unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg(); 1264 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1265 CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1266 } 1267 } else 1268 i += NumVals; 1269 } 1270 continue; 1271 } 1272 1273 if (!Node->isMachineOpcode()) 1274 continue; 1275 // If we're in the middle of scheduling a call, don't begin scheduling 1276 // another call. Also, don't allow any physical registers to be live across 1277 // the call. 1278 if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { 1279 // Check the special calling-sequence resource. 1280 unsigned CallResource = TRI->getNumRegs(); 1281 if (LiveRegDefs[CallResource]) { 1282 SDNode *Gen = LiveRegGens[CallResource]->getNode(); 1283 while (SDNode *Glued = Gen->getGluedNode()) 1284 Gen = Glued; 1285 if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) 1286 LRegs.push_back(CallResource); 1287 } 1288 } 1289 if (const uint32_t *RegMask = getNodeRegMask(Node)) 1290 CheckForLiveRegDefMasked(SU, RegMask, LiveRegDefs, RegAdded, LRegs); 1291 1292 const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); 1293 if (!MCID.ImplicitDefs) 1294 continue; 1295 for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) 1296 CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); 1297 } 1298 1299 return !LRegs.empty(); 1300 } 1301 1302 /// Return a node that can be scheduled in this cycle. Requirements: 1303 /// (1) Ready: latency has been satisfied 1304 /// (2) No Hazards: resources are available 1305 /// (3) No Interferences: may unschedule to break register interferences. 1306 SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { 1307 SmallVector<SUnit*, 4> Interferences; 1308 DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap; 1309 1310 SUnit *CurSU = AvailableQueue->pop(); 1311 while (CurSU) { 1312 SmallVector<unsigned, 4> LRegs; 1313 if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) 1314 break; 1315 LRegsMap.insert(std::make_pair(CurSU, LRegs)); 1316 1317 CurSU->isPending = true; // This SU is not in AvailableQueue right now. 1318 Interferences.push_back(CurSU); 1319 CurSU = AvailableQueue->pop(); 1320 } 1321 if (CurSU) { 1322 // Add the nodes that aren't ready back onto the available list. 1323 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1324 Interferences[i]->isPending = false; 1325 assert(Interferences[i]->isAvailable && "must still be available"); 1326 AvailableQueue->push(Interferences[i]); 1327 } 1328 return CurSU; 1329 } 1330 1331 // All candidates are delayed due to live physical reg dependencies. 1332 // Try backtracking, code duplication, or inserting cross class copies 1333 // to resolve it. 1334 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1335 SUnit *TrySU = Interferences[i]; 1336 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1337 1338 // Try unscheduling up to the point where it's safe to schedule 1339 // this node. 1340 SUnit *BtSU = NULL; 1341 unsigned LiveCycle = UINT_MAX; 1342 for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) { 1343 unsigned Reg = LRegs[j]; 1344 if (LiveRegGens[Reg]->getHeight() < LiveCycle) { 1345 BtSU = LiveRegGens[Reg]; 1346 LiveCycle = BtSU->getHeight(); 1347 } 1348 } 1349 if (!WillCreateCycle(TrySU, BtSU)) { 1350 BacktrackBottomUp(TrySU, BtSU); 1351 1352 // Force the current node to be scheduled before the node that 1353 // requires the physical reg dep. 1354 if (BtSU->isAvailable) { 1355 BtSU->isAvailable = false; 1356 if (!BtSU->isPending) 1357 AvailableQueue->remove(BtSU); 1358 } 1359 AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1, 1360 /*Reg=*/0, /*isNormalMemory=*/false, 1361 /*isMustAlias=*/false, /*isArtificial=*/true)); 1362 1363 // If one or more successors has been unscheduled, then the current 1364 // node is no longer avaialable. Schedule a successor that's now 1365 // available instead. 1366 if (!TrySU->isAvailable) { 1367 CurSU = AvailableQueue->pop(); 1368 } 1369 else { 1370 CurSU = TrySU; 1371 TrySU->isPending = false; 1372 Interferences.erase(Interferences.begin()+i); 1373 } 1374 break; 1375 } 1376 } 1377 1378 if (!CurSU) { 1379 // Can't backtrack. If it's too expensive to copy the value, then try 1380 // duplicate the nodes that produces these "too expensive to copy" 1381 // values to break the dependency. In case even that doesn't work, 1382 // insert cross class copies. 1383 // If it's not too expensive, i.e. cost != -1, issue copies. 1384 SUnit *TrySU = Interferences[0]; 1385 SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; 1386 assert(LRegs.size() == 1 && "Can't handle this yet!"); 1387 unsigned Reg = LRegs[0]; 1388 SUnit *LRDef = LiveRegDefs[Reg]; 1389 EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); 1390 const TargetRegisterClass *RC = 1391 TRI->getMinimalPhysRegClass(Reg, VT); 1392 const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); 1393 1394 // If cross copy register class is the same as RC, then it must be possible 1395 // copy the value directly. Do not try duplicate the def. 1396 // If cross copy register class is not the same as RC, then it's possible to 1397 // copy the value but it require cross register class copies and it is 1398 // expensive. 1399 // If cross copy register class is null, then it's not possible to copy 1400 // the value at all. 1401 SUnit *NewDef = 0; 1402 if (DestRC != RC) { 1403 NewDef = CopyAndMoveSuccessors(LRDef); 1404 if (!DestRC && !NewDef) 1405 report_fatal_error("Can't handle live physical register dependency!"); 1406 } 1407 if (!NewDef) { 1408 // Issue copies, these can be expensive cross register class copies. 1409 SmallVector<SUnit*, 2> Copies; 1410 InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); 1411 DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum 1412 << " to SU #" << Copies.front()->NodeNum << "\n"); 1413 AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, 1414 /*Reg=*/0, /*isNormalMemory=*/false, 1415 /*isMustAlias=*/false, 1416 /*isArtificial=*/true)); 1417 NewDef = Copies.back(); 1418 } 1419 1420 DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum 1421 << " to SU #" << TrySU->NodeNum << "\n"); 1422 LiveRegDefs[Reg] = NewDef; 1423 AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1, 1424 /*Reg=*/0, /*isNormalMemory=*/false, 1425 /*isMustAlias=*/false, 1426 /*isArtificial=*/true)); 1427 TrySU->isAvailable = false; 1428 CurSU = NewDef; 1429 } 1430 1431 assert(CurSU && "Unable to resolve live physical register dependencies!"); 1432 1433 // Add the nodes that aren't ready back onto the available list. 1434 for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { 1435 Interferences[i]->isPending = false; 1436 // May no longer be available due to backtracking. 1437 if (Interferences[i]->isAvailable) { 1438 AvailableQueue->push(Interferences[i]); 1439 } 1440 } 1441 return CurSU; 1442 } 1443 1444 /// ListScheduleBottomUp - The main loop of list scheduling for bottom-up 1445 /// schedulers. 1446 void ScheduleDAGRRList::ListScheduleBottomUp() { 1447 // Release any predecessors of the special Exit node. 1448 ReleasePredecessors(&ExitSU); 1449 1450 // Add root to Available queue. 1451 if (!SUnits.empty()) { 1452 SUnit *RootSU = &SUnits[DAG->getRoot().getNode()->getNodeId()]; 1453 assert(RootSU->Succs.empty() && "Graph root shouldn't have successors!"); 1454 RootSU->isAvailable = true; 1455 AvailableQueue->push(RootSU); 1456 } 1457 1458 // While Available queue is not empty, grab the node with the highest 1459 // priority. If it is not ready put it back. Schedule the node. 1460 Sequence.reserve(SUnits.size()); 1461 while (!AvailableQueue->empty()) { 1462 DEBUG(dbgs() << "\nExamining Available:\n"; 1463 AvailableQueue->dump(this)); 1464 1465 // Pick the best node to schedule taking all constraints into 1466 // consideration. 1467 SUnit *SU = PickNodeToScheduleBottomUp(); 1468 1469 AdvancePastStalls(SU); 1470 1471 ScheduleNodeBottomUp(SU); 1472 1473 while (AvailableQueue->empty() && !PendingQueue.empty()) { 1474 // Advance the cycle to free resources. Skip ahead to the next ready SU. 1475 assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized"); 1476 AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle)); 1477 } 1478 } 1479 1480 // Reverse the order if it is bottom up. 1481 std::reverse(Sequence.begin(), Sequence.end()); 1482 1483 #ifndef NDEBUG 1484 VerifyScheduledSequence(/*isBottomUp=*/true); 1485 #endif 1486 } 1487 1488 //===----------------------------------------------------------------------===// 1489 // RegReductionPriorityQueue Definition 1490 //===----------------------------------------------------------------------===// 1491 // 1492 // This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers 1493 // to reduce register pressure. 1494 // 1495 namespace { 1496 class RegReductionPQBase; 1497 1498 struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> { 1499 bool isReady(SUnit* SU, unsigned CurCycle) const { return true; } 1500 }; 1501 1502 #ifndef NDEBUG 1503 template<class SF> 1504 struct reverse_sort : public queue_sort { 1505 SF &SortFunc; 1506 reverse_sort(SF &sf) : SortFunc(sf) {} 1507 reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {} 1508 1509 bool operator()(SUnit* left, SUnit* right) const { 1510 // reverse left/right rather than simply !SortFunc(left, right) 1511 // to expose different paths in the comparison logic. 1512 return SortFunc(right, left); 1513 } 1514 }; 1515 #endif // NDEBUG 1516 1517 /// bu_ls_rr_sort - Priority function for bottom up register pressure 1518 // reduction scheduler. 1519 struct bu_ls_rr_sort : public queue_sort { 1520 enum { 1521 IsBottomUp = true, 1522 HasReadyFilter = false 1523 }; 1524 1525 RegReductionPQBase *SPQ; 1526 bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} 1527 bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} 1528 1529 bool operator()(SUnit* left, SUnit* right) const; 1530 }; 1531 1532 // src_ls_rr_sort - Priority function for source order scheduler. 1533 struct src_ls_rr_sort : public queue_sort { 1534 enum { 1535 IsBottomUp = true, 1536 HasReadyFilter = false 1537 }; 1538 1539 RegReductionPQBase *SPQ; 1540 src_ls_rr_sort(RegReductionPQBase *spq) 1541 : SPQ(spq) {} 1542 src_ls_rr_sort(const src_ls_rr_sort &RHS) 1543 : SPQ(RHS.SPQ) {} 1544 1545 bool operator()(SUnit* left, SUnit* right) const; 1546 }; 1547 1548 // hybrid_ls_rr_sort - Priority function for hybrid scheduler. 1549 struct hybrid_ls_rr_sort : public queue_sort { 1550 enum { 1551 IsBottomUp = true, 1552 HasReadyFilter = false 1553 }; 1554 1555 RegReductionPQBase *SPQ; 1556 hybrid_ls_rr_sort(RegReductionPQBase *spq) 1557 : SPQ(spq) {} 1558 hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS) 1559 : SPQ(RHS.SPQ) {} 1560 1561 bool isReady(SUnit *SU, unsigned CurCycle) const; 1562 1563 bool operator()(SUnit* left, SUnit* right) const; 1564 }; 1565 1566 // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism) 1567 // scheduler. 1568 struct ilp_ls_rr_sort : public queue_sort { 1569 enum { 1570 IsBottomUp = true, 1571 HasReadyFilter = false 1572 }; 1573 1574 RegReductionPQBase *SPQ; 1575 ilp_ls_rr_sort(RegReductionPQBase *spq) 1576 : SPQ(spq) {} 1577 ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS) 1578 : SPQ(RHS.SPQ) {} 1579 1580 bool isReady(SUnit *SU, unsigned CurCycle) const; 1581 1582 bool operator()(SUnit* left, SUnit* right) const; 1583 }; 1584 1585 class RegReductionPQBase : public SchedulingPriorityQueue { 1586 protected: 1587 std::vector<SUnit*> Queue; 1588 unsigned CurQueueId; 1589 bool TracksRegPressure; 1590 bool SrcOrder; 1591 1592 // SUnits - The SUnits for the current graph. 1593 std::vector<SUnit> *SUnits; 1594 1595 MachineFunction &MF; 1596 const TargetInstrInfo *TII; 1597 const TargetRegisterInfo *TRI; 1598 const TargetLowering *TLI; 1599 ScheduleDAGRRList *scheduleDAG; 1600 1601 // SethiUllmanNumbers - The SethiUllman number for each node. 1602 std::vector<unsigned> SethiUllmanNumbers; 1603 1604 /// RegPressure - Tracking current reg pressure per register class. 1605 /// 1606 std::vector<unsigned> RegPressure; 1607 1608 /// RegLimit - Tracking the number of allocatable registers per register 1609 /// class. 1610 std::vector<unsigned> RegLimit; 1611 1612 public: 1613 RegReductionPQBase(MachineFunction &mf, 1614 bool hasReadyFilter, 1615 bool tracksrp, 1616 bool srcorder, 1617 const TargetInstrInfo *tii, 1618 const TargetRegisterInfo *tri, 1619 const TargetLowering *tli) 1620 : SchedulingPriorityQueue(hasReadyFilter), 1621 CurQueueId(0), TracksRegPressure(tracksrp), SrcOrder(srcorder), 1622 MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) { 1623 if (TracksRegPressure) { 1624 unsigned NumRC = TRI->getNumRegClasses(); 1625 RegLimit.resize(NumRC); 1626 RegPressure.resize(NumRC); 1627 std::fill(RegLimit.begin(), RegLimit.end(), 0); 1628 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1629 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1630 E = TRI->regclass_end(); I != E; ++I) 1631 RegLimit[(*I)->getID()] = tri->getRegPressureLimit(*I, MF); 1632 } 1633 } 1634 1635 void setScheduleDAG(ScheduleDAGRRList *scheduleDag) { 1636 scheduleDAG = scheduleDag; 1637 } 1638 1639 ScheduleHazardRecognizer* getHazardRec() { 1640 return scheduleDAG->getHazardRec(); 1641 } 1642 1643 void initNodes(std::vector<SUnit> &sunits); 1644 1645 void addNode(const SUnit *SU); 1646 1647 void updateNode(const SUnit *SU); 1648 1649 void releaseState() { 1650 SUnits = 0; 1651 SethiUllmanNumbers.clear(); 1652 std::fill(RegPressure.begin(), RegPressure.end(), 0); 1653 } 1654 1655 unsigned getNodePriority(const SUnit *SU) const; 1656 1657 unsigned getNodeOrdering(const SUnit *SU) const { 1658 if (!SU->getNode()) return 0; 1659 1660 return scheduleDAG->DAG->GetOrdering(SU->getNode()); 1661 } 1662 1663 bool empty() const { return Queue.empty(); } 1664 1665 void push(SUnit *U) { 1666 assert(!U->NodeQueueId && "Node in the queue already"); 1667 U->NodeQueueId = ++CurQueueId; 1668 Queue.push_back(U); 1669 } 1670 1671 void remove(SUnit *SU) { 1672 assert(!Queue.empty() && "Queue is empty!"); 1673 assert(SU->NodeQueueId != 0 && "Not in queue!"); 1674 std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), 1675 SU); 1676 if (I != prior(Queue.end())) 1677 std::swap(*I, Queue.back()); 1678 Queue.pop_back(); 1679 SU->NodeQueueId = 0; 1680 } 1681 1682 bool tracksRegPressure() const { return TracksRegPressure; } 1683 1684 void dumpRegPressure() const; 1685 1686 bool HighRegPressure(const SUnit *SU) const; 1687 1688 bool MayReduceRegPressure(SUnit *SU) const; 1689 1690 int RegPressureDiff(SUnit *SU, unsigned &LiveUses) const; 1691 1692 void scheduledNode(SUnit *SU); 1693 1694 void unscheduledNode(SUnit *SU); 1695 1696 protected: 1697 bool canClobber(const SUnit *SU, const SUnit *Op); 1698 void AddPseudoTwoAddrDeps(); 1699 void PrescheduleNodesWithMultipleUses(); 1700 void CalculateSethiUllmanNumbers(); 1701 }; 1702 1703 template<class SF> 1704 static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) { 1705 std::vector<SUnit *>::iterator Best = Q.begin(); 1706 for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()), 1707 E = Q.end(); I != E; ++I) 1708 if (Picker(*Best, *I)) 1709 Best = I; 1710 SUnit *V = *Best; 1711 if (Best != prior(Q.end())) 1712 std::swap(*Best, Q.back()); 1713 Q.pop_back(); 1714 return V; 1715 } 1716 1717 template<class SF> 1718 SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) { 1719 #ifndef NDEBUG 1720 if (DAG->StressSched) { 1721 reverse_sort<SF> RPicker(Picker); 1722 return popFromQueueImpl(Q, RPicker); 1723 } 1724 #endif 1725 (void)DAG; 1726 return popFromQueueImpl(Q, Picker); 1727 } 1728 1729 template<class SF> 1730 class RegReductionPriorityQueue : public RegReductionPQBase { 1731 SF Picker; 1732 1733 public: 1734 RegReductionPriorityQueue(MachineFunction &mf, 1735 bool tracksrp, 1736 bool srcorder, 1737 const TargetInstrInfo *tii, 1738 const TargetRegisterInfo *tri, 1739 const TargetLowering *tli) 1740 : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, srcorder, 1741 tii, tri, tli), 1742 Picker(this) {} 1743 1744 bool isBottomUp() const { return SF::IsBottomUp; } 1745 1746 bool isReady(SUnit *U) const { 1747 return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle()); 1748 } 1749 1750 SUnit *pop() { 1751 if (Queue.empty()) return NULL; 1752 1753 SUnit *V = popFromQueue(Queue, Picker, scheduleDAG); 1754 V->NodeQueueId = 0; 1755 return V; 1756 } 1757 1758 void dump(ScheduleDAG *DAG) const { 1759 // Emulate pop() without clobbering NodeQueueIds. 1760 std::vector<SUnit*> DumpQueue = Queue; 1761 SF DumpPicker = Picker; 1762 while (!DumpQueue.empty()) { 1763 SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); 1764 dbgs() << "Height " << SU->getHeight() << ": "; 1765 SU->dump(DAG); 1766 } 1767 } 1768 }; 1769 1770 typedef RegReductionPriorityQueue<bu_ls_rr_sort> 1771 BURegReductionPriorityQueue; 1772 1773 typedef RegReductionPriorityQueue<src_ls_rr_sort> 1774 SrcRegReductionPriorityQueue; 1775 1776 typedef RegReductionPriorityQueue<hybrid_ls_rr_sort> 1777 HybridBURRPriorityQueue; 1778 1779 typedef RegReductionPriorityQueue<ilp_ls_rr_sort> 1780 ILPBURRPriorityQueue; 1781 } // end anonymous namespace 1782 1783 //===----------------------------------------------------------------------===// 1784 // Static Node Priority for Register Pressure Reduction 1785 //===----------------------------------------------------------------------===// 1786 1787 // Check for special nodes that bypass scheduling heuristics. 1788 // Currently this pushes TokenFactor nodes down, but may be used for other 1789 // pseudo-ops as well. 1790 // 1791 // Return -1 to schedule right above left, 1 for left above right. 1792 // Return 0 if no bias exists. 1793 static int checkSpecialNodes(const SUnit *left, const SUnit *right) { 1794 bool LSchedLow = left->isScheduleLow; 1795 bool RSchedLow = right->isScheduleLow; 1796 if (LSchedLow != RSchedLow) 1797 return LSchedLow < RSchedLow ? 1 : -1; 1798 return 0; 1799 } 1800 1801 /// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number. 1802 /// Smaller number is the higher priority. 1803 static unsigned 1804 CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) { 1805 unsigned &SethiUllmanNumber = SUNumbers[SU->NodeNum]; 1806 if (SethiUllmanNumber != 0) 1807 return SethiUllmanNumber; 1808 1809 unsigned Extra = 0; 1810 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 1811 I != E; ++I) { 1812 if (I->isCtrl()) continue; // ignore chain preds 1813 SUnit *PredSU = I->getSUnit(); 1814 unsigned PredSethiUllman = CalcNodeSethiUllmanNumber(PredSU, SUNumbers); 1815 if (PredSethiUllman > SethiUllmanNumber) { 1816 SethiUllmanNumber = PredSethiUllman; 1817 Extra = 0; 1818 } else if (PredSethiUllman == SethiUllmanNumber) 1819 ++Extra; 1820 } 1821 1822 SethiUllmanNumber += Extra; 1823 1824 if (SethiUllmanNumber == 0) 1825 SethiUllmanNumber = 1; 1826 1827 return SethiUllmanNumber; 1828 } 1829 1830 /// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all 1831 /// scheduling units. 1832 void RegReductionPQBase::CalculateSethiUllmanNumbers() { 1833 SethiUllmanNumbers.assign(SUnits->size(), 0); 1834 1835 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) 1836 CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers); 1837 } 1838 1839 void RegReductionPQBase::addNode(const SUnit *SU) { 1840 unsigned SUSize = SethiUllmanNumbers.size(); 1841 if (SUnits->size() > SUSize) 1842 SethiUllmanNumbers.resize(SUSize*2, 0); 1843 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1844 } 1845 1846 void RegReductionPQBase::updateNode(const SUnit *SU) { 1847 SethiUllmanNumbers[SU->NodeNum] = 0; 1848 CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers); 1849 } 1850 1851 // Lower priority means schedule further down. For bottom-up scheduling, lower 1852 // priority SUs are scheduled before higher priority SUs. 1853 unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const { 1854 assert(SU->NodeNum < SethiUllmanNumbers.size()); 1855 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 1856 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 1857 // CopyToReg should be close to its uses to facilitate coalescing and 1858 // avoid spilling. 1859 return 0; 1860 if (Opc == TargetOpcode::EXTRACT_SUBREG || 1861 Opc == TargetOpcode::SUBREG_TO_REG || 1862 Opc == TargetOpcode::INSERT_SUBREG) 1863 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 1864 // close to their uses to facilitate coalescing. 1865 return 0; 1866 if (SU->NumSuccs == 0 && SU->NumPreds != 0) 1867 // If SU does not have a register use, i.e. it doesn't produce a value 1868 // that would be consumed (e.g. store), then it terminates a chain of 1869 // computation. Give it a large SethiUllman number so it will be 1870 // scheduled right before its predecessors that it doesn't lengthen 1871 // their live ranges. 1872 return 0xffff; 1873 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 1874 // If SU does not have a register def, schedule it close to its uses 1875 // because it does not lengthen any live ranges. 1876 return 0; 1877 #if 1 1878 return SethiUllmanNumbers[SU->NodeNum]; 1879 #else 1880 unsigned Priority = SethiUllmanNumbers[SU->NodeNum]; 1881 if (SU->isCallOp) { 1882 // FIXME: This assumes all of the defs are used as call operands. 1883 int NP = (int)Priority - SU->getNode()->getNumValues(); 1884 return (NP > 0) ? NP : 0; 1885 } 1886 return Priority; 1887 #endif 1888 } 1889 1890 //===----------------------------------------------------------------------===// 1891 // Register Pressure Tracking 1892 //===----------------------------------------------------------------------===// 1893 1894 void RegReductionPQBase::dumpRegPressure() const { 1895 for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), 1896 E = TRI->regclass_end(); I != E; ++I) { 1897 const TargetRegisterClass *RC = *I; 1898 unsigned Id = RC->getID(); 1899 unsigned RP = RegPressure[Id]; 1900 if (!RP) continue; 1901 DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id] 1902 << '\n'); 1903 } 1904 } 1905 1906 bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const { 1907 if (!TLI) 1908 return false; 1909 1910 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1911 I != E; ++I) { 1912 if (I->isCtrl()) 1913 continue; 1914 SUnit *PredSU = I->getSUnit(); 1915 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1916 // to cover the number of registers defined (they are all live). 1917 if (PredSU->NumRegDefsLeft == 0) { 1918 continue; 1919 } 1920 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1921 RegDefPos.IsValid(); RegDefPos.Advance()) { 1922 unsigned RCId, Cost; 1923 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 1924 1925 if ((RegPressure[RCId] + Cost) >= RegLimit[RCId]) 1926 return true; 1927 } 1928 } 1929 return false; 1930 } 1931 1932 bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { 1933 const SDNode *N = SU->getNode(); 1934 1935 if (!N->isMachineOpcode() || !SU->NumSuccs) 1936 return false; 1937 1938 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1939 for (unsigned i = 0; i != NumDefs; ++i) { 1940 EVT VT = N->getValueType(i); 1941 if (!N->hasAnyUseOfValue(i)) 1942 continue; 1943 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1944 if (RegPressure[RCId] >= RegLimit[RCId]) 1945 return true; 1946 } 1947 return false; 1948 } 1949 1950 // Compute the register pressure contribution by this instruction by count up 1951 // for uses that are not live and down for defs. Only count register classes 1952 // that are already under high pressure. As a side effect, compute the number of 1953 // uses of registers that are already live. 1954 // 1955 // FIXME: This encompasses the logic in HighRegPressure and MayReduceRegPressure 1956 // so could probably be factored. 1957 int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { 1958 LiveUses = 0; 1959 int PDiff = 0; 1960 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 1961 I != E; ++I) { 1962 if (I->isCtrl()) 1963 continue; 1964 SUnit *PredSU = I->getSUnit(); 1965 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 1966 // to cover the number of registers defined (they are all live). 1967 if (PredSU->NumRegDefsLeft == 0) { 1968 if (PredSU->getNode()->isMachineOpcode()) 1969 ++LiveUses; 1970 continue; 1971 } 1972 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 1973 RegDefPos.IsValid(); RegDefPos.Advance()) { 1974 EVT VT = RegDefPos.GetValue(); 1975 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1976 if (RegPressure[RCId] >= RegLimit[RCId]) 1977 ++PDiff; 1978 } 1979 } 1980 const SDNode *N = SU->getNode(); 1981 1982 if (!N || !N->isMachineOpcode() || !SU->NumSuccs) 1983 return PDiff; 1984 1985 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 1986 for (unsigned i = 0; i != NumDefs; ++i) { 1987 EVT VT = N->getValueType(i); 1988 if (!N->hasAnyUseOfValue(i)) 1989 continue; 1990 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 1991 if (RegPressure[RCId] >= RegLimit[RCId]) 1992 --PDiff; 1993 } 1994 return PDiff; 1995 } 1996 1997 void RegReductionPQBase::scheduledNode(SUnit *SU) { 1998 if (!TracksRegPressure) 1999 return; 2000 2001 if (!SU->getNode()) 2002 return; 2003 2004 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2005 I != E; ++I) { 2006 if (I->isCtrl()) 2007 continue; 2008 SUnit *PredSU = I->getSUnit(); 2009 // NumRegDefsLeft is zero when enough uses of this node have been scheduled 2010 // to cover the number of registers defined (they are all live). 2011 if (PredSU->NumRegDefsLeft == 0) { 2012 continue; 2013 } 2014 // FIXME: The ScheduleDAG currently loses information about which of a 2015 // node's values is consumed by each dependence. Consequently, if the node 2016 // defines multiple register classes, we don't know which to pressurize 2017 // here. Instead the following loop consumes the register defs in an 2018 // arbitrary order. At least it handles the common case of clustered loads 2019 // to the same class. For precise liveness, each SDep needs to indicate the 2020 // result number. But that tightly couples the ScheduleDAG with the 2021 // SelectionDAG making updates tricky. A simpler hack would be to attach a 2022 // value type or register class to SDep. 2023 // 2024 // The most important aspect of register tracking is balancing the increase 2025 // here with the reduction further below. Note that this SU may use multiple 2026 // defs in PredSU. The can't be determined here, but we've already 2027 // compensated by reducing NumRegDefsLeft in PredSU during 2028 // ScheduleDAGSDNodes::AddSchedEdges. 2029 --PredSU->NumRegDefsLeft; 2030 unsigned SkipRegDefs = PredSU->NumRegDefsLeft; 2031 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); 2032 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2033 if (SkipRegDefs) 2034 continue; 2035 2036 unsigned RCId, Cost; 2037 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2038 RegPressure[RCId] += Cost; 2039 break; 2040 } 2041 } 2042 2043 // We should have this assert, but there may be dead SDNodes that never 2044 // materialize as SUnits, so they don't appear to generate liveness. 2045 //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses"); 2046 int SkipRegDefs = (int)SU->NumRegDefsLeft; 2047 for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG); 2048 RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) { 2049 if (SkipRegDefs > 0) 2050 continue; 2051 unsigned RCId, Cost; 2052 GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost); 2053 if (RegPressure[RCId] < Cost) { 2054 // Register pressure tracking is imprecise. This can happen. But we try 2055 // hard not to let it happen because it likely results in poor scheduling. 2056 DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n"); 2057 RegPressure[RCId] = 0; 2058 } 2059 else { 2060 RegPressure[RCId] -= Cost; 2061 } 2062 } 2063 dumpRegPressure(); 2064 } 2065 2066 void RegReductionPQBase::unscheduledNode(SUnit *SU) { 2067 if (!TracksRegPressure) 2068 return; 2069 2070 const SDNode *N = SU->getNode(); 2071 if (!N) return; 2072 2073 if (!N->isMachineOpcode()) { 2074 if (N->getOpcode() != ISD::CopyToReg) 2075 return; 2076 } else { 2077 unsigned Opc = N->getMachineOpcode(); 2078 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2079 Opc == TargetOpcode::INSERT_SUBREG || 2080 Opc == TargetOpcode::SUBREG_TO_REG || 2081 Opc == TargetOpcode::REG_SEQUENCE || 2082 Opc == TargetOpcode::IMPLICIT_DEF) 2083 return; 2084 } 2085 2086 for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2087 I != E; ++I) { 2088 if (I->isCtrl()) 2089 continue; 2090 SUnit *PredSU = I->getSUnit(); 2091 // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only 2092 // counts data deps. 2093 if (PredSU->NumSuccsLeft != PredSU->Succs.size()) 2094 continue; 2095 const SDNode *PN = PredSU->getNode(); 2096 if (!PN->isMachineOpcode()) { 2097 if (PN->getOpcode() == ISD::CopyFromReg) { 2098 EVT VT = PN->getValueType(0); 2099 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2100 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2101 } 2102 continue; 2103 } 2104 unsigned POpc = PN->getMachineOpcode(); 2105 if (POpc == TargetOpcode::IMPLICIT_DEF) 2106 continue; 2107 if (POpc == TargetOpcode::EXTRACT_SUBREG || 2108 POpc == TargetOpcode::INSERT_SUBREG || 2109 POpc == TargetOpcode::SUBREG_TO_REG) { 2110 EVT VT = PN->getValueType(0); 2111 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2112 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2113 continue; 2114 } 2115 unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); 2116 for (unsigned i = 0; i != NumDefs; ++i) { 2117 EVT VT = PN->getValueType(i); 2118 if (!PN->hasAnyUseOfValue(i)) 2119 continue; 2120 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2121 if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) 2122 // Register pressure tracking is imprecise. This can happen. 2123 RegPressure[RCId] = 0; 2124 else 2125 RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); 2126 } 2127 } 2128 2129 // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses() 2130 // may transfer data dependencies to CopyToReg. 2131 if (SU->NumSuccs && N->isMachineOpcode()) { 2132 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2133 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2134 EVT VT = N->getValueType(i); 2135 if (VT == MVT::Glue || VT == MVT::Other) 2136 continue; 2137 if (!N->hasAnyUseOfValue(i)) 2138 continue; 2139 unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); 2140 RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); 2141 } 2142 } 2143 2144 dumpRegPressure(); 2145 } 2146 2147 //===----------------------------------------------------------------------===// 2148 // Dynamic Node Priority for Register Pressure Reduction 2149 //===----------------------------------------------------------------------===// 2150 2151 /// closestSucc - Returns the scheduled cycle of the successor which is 2152 /// closest to the current cycle. 2153 static unsigned closestSucc(const SUnit *SU) { 2154 unsigned MaxHeight = 0; 2155 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2156 I != E; ++I) { 2157 if (I->isCtrl()) continue; // ignore chain succs 2158 unsigned Height = I->getSUnit()->getHeight(); 2159 // If there are bunch of CopyToRegs stacked up, they should be considered 2160 // to be at the same position. 2161 if (I->getSUnit()->getNode() && 2162 I->getSUnit()->getNode()->getOpcode() == ISD::CopyToReg) 2163 Height = closestSucc(I->getSUnit())+1; 2164 if (Height > MaxHeight) 2165 MaxHeight = Height; 2166 } 2167 return MaxHeight; 2168 } 2169 2170 /// calcMaxScratches - Returns an cost estimate of the worse case requirement 2171 /// for scratch registers, i.e. number of data dependencies. 2172 static unsigned calcMaxScratches(const SUnit *SU) { 2173 unsigned Scratches = 0; 2174 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2175 I != E; ++I) { 2176 if (I->isCtrl()) continue; // ignore chain preds 2177 Scratches++; 2178 } 2179 return Scratches; 2180 } 2181 2182 /// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are 2183 /// CopyFromReg from a virtual register. 2184 static bool hasOnlyLiveInOpers(const SUnit *SU) { 2185 bool RetVal = false; 2186 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2187 I != E; ++I) { 2188 if (I->isCtrl()) continue; 2189 const SUnit *PredSU = I->getSUnit(); 2190 if (PredSU->getNode() && 2191 PredSU->getNode()->getOpcode() == ISD::CopyFromReg) { 2192 unsigned Reg = 2193 cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg(); 2194 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2195 RetVal = true; 2196 continue; 2197 } 2198 } 2199 return false; 2200 } 2201 return RetVal; 2202 } 2203 2204 /// hasOnlyLiveOutUses - Return true if SU has only value successors that are 2205 /// CopyToReg to a virtual register. This SU def is probably a liveout and 2206 /// it has no other use. It should be scheduled closer to the terminator. 2207 static bool hasOnlyLiveOutUses(const SUnit *SU) { 2208 bool RetVal = false; 2209 for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); 2210 I != E; ++I) { 2211 if (I->isCtrl()) continue; 2212 const SUnit *SuccSU = I->getSUnit(); 2213 if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { 2214 unsigned Reg = 2215 cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); 2216 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 2217 RetVal = true; 2218 continue; 2219 } 2220 } 2221 return false; 2222 } 2223 return RetVal; 2224 } 2225 2226 // Set isVRegCycle for a node with only live in opers and live out uses. Also 2227 // set isVRegCycle for its CopyFromReg operands. 2228 // 2229 // This is only relevant for single-block loops, in which case the VRegCycle 2230 // node is likely an induction variable in which the operand and target virtual 2231 // registers should be coalesced (e.g. pre/post increment values). Setting the 2232 // isVRegCycle flag helps the scheduler prioritize other uses of the same 2233 // CopyFromReg so that this node becomes the virtual register "kill". This 2234 // avoids interference between the values live in and out of the block and 2235 // eliminates a copy inside the loop. 2236 static void initVRegCycle(SUnit *SU) { 2237 if (DisableSchedVRegCycle) 2238 return; 2239 2240 if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU)) 2241 return; 2242 2243 DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n"); 2244 2245 SU->isVRegCycle = true; 2246 2247 for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); 2248 I != E; ++I) { 2249 if (I->isCtrl()) continue; 2250 I->getSUnit()->isVRegCycle = true; 2251 } 2252 } 2253 2254 // After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of 2255 // CopyFromReg operands. We should no longer penalize other uses of this VReg. 2256 static void resetVRegCycle(SUnit *SU) { 2257 if (!SU->isVRegCycle) 2258 return; 2259 2260 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2261 I != E; ++I) { 2262 if (I->isCtrl()) continue; // ignore chain preds 2263 SUnit *PredSU = I->getSUnit(); 2264 if (PredSU->isVRegCycle) { 2265 assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg && 2266 "VRegCycle def must be CopyFromReg"); 2267 I->getSUnit()->isVRegCycle = 0; 2268 } 2269 } 2270 } 2271 2272 // Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This 2273 // means a node that defines the VRegCycle has not been scheduled yet. 2274 static bool hasVRegCycleUse(const SUnit *SU) { 2275 // If this SU also defines the VReg, don't hoist it as a "use". 2276 if (SU->isVRegCycle) 2277 return false; 2278 2279 for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); 2280 I != E; ++I) { 2281 if (I->isCtrl()) continue; // ignore chain preds 2282 if (I->getSUnit()->isVRegCycle && 2283 I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) { 2284 DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n"); 2285 return true; 2286 } 2287 } 2288 return false; 2289 } 2290 2291 // Check for either a dependence (latency) or resource (hazard) stall. 2292 // 2293 // Note: The ScheduleHazardRecognizer interface requires a non-const SU. 2294 static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) { 2295 if ((int)SPQ->getCurCycle() < Height) return true; 2296 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2297 != ScheduleHazardRecognizer::NoHazard) 2298 return true; 2299 return false; 2300 } 2301 2302 // Return -1 if left has higher priority, 1 if right has higher priority. 2303 // Return 0 if latency-based priority is equivalent. 2304 static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, 2305 RegReductionPQBase *SPQ) { 2306 // Scheduling an instruction that uses a VReg whose postincrement has not yet 2307 // been scheduled will induce a copy. Model this as an extra cycle of latency. 2308 int LPenalty = hasVRegCycleUse(left) ? 1 : 0; 2309 int RPenalty = hasVRegCycleUse(right) ? 1 : 0; 2310 int LHeight = (int)left->getHeight() + LPenalty; 2311 int RHeight = (int)right->getHeight() + RPenalty; 2312 2313 bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && 2314 BUHasStall(left, LHeight, SPQ); 2315 bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && 2316 BUHasStall(right, RHeight, SPQ); 2317 2318 // If scheduling one of the node will cause a pipeline stall, delay it. 2319 // If scheduling either one of the node will cause a pipeline stall, sort 2320 // them according to their height. 2321 if (LStall) { 2322 if (!RStall) 2323 return 1; 2324 if (LHeight != RHeight) 2325 return LHeight > RHeight ? 1 : -1; 2326 } else if (RStall) 2327 return -1; 2328 2329 // If either node is scheduling for latency, sort them by height/depth 2330 // and latency. 2331 if (!checkPref || (left->SchedulingPref == Sched::ILP || 2332 right->SchedulingPref == Sched::ILP)) { 2333 if (DisableSchedCycles) { 2334 if (LHeight != RHeight) 2335 return LHeight > RHeight ? 1 : -1; 2336 } 2337 else { 2338 // If neither instruction stalls (!LStall && !RStall) then 2339 // its height is already covered so only its depth matters. We also reach 2340 // this if both stall but have the same height. 2341 int LDepth = left->getDepth() - LPenalty; 2342 int RDepth = right->getDepth() - RPenalty; 2343 if (LDepth != RDepth) { 2344 DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum 2345 << ") depth " << LDepth << " vs SU (" << right->NodeNum 2346 << ") depth " << RDepth << "\n"); 2347 return LDepth < RDepth ? 1 : -1; 2348 } 2349 } 2350 if (left->Latency != right->Latency) 2351 return left->Latency > right->Latency ? 1 : -1; 2352 } 2353 return 0; 2354 } 2355 2356 static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { 2357 // Schedule physical register definitions close to their use. This is 2358 // motivated by microarchitectures that can fuse cmp+jump macro-ops. But as 2359 // long as shortening physreg live ranges is generally good, we can defer 2360 // creating a subtarget hook. 2361 if (!DisableSchedPhysRegJoin) { 2362 bool LHasPhysReg = left->hasPhysRegDefs; 2363 bool RHasPhysReg = right->hasPhysRegDefs; 2364 if (LHasPhysReg != RHasPhysReg) { 2365 #ifndef NDEBUG 2366 const char *PhysRegMsg[] = {" has no physreg", " defines a physreg"}; 2367 #endif 2368 DEBUG(dbgs() << " SU (" << left->NodeNum << ") " 2369 << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " 2370 << PhysRegMsg[RHasPhysReg] << "\n"); 2371 return LHasPhysReg < RHasPhysReg; 2372 } 2373 } 2374 2375 // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down. 2376 unsigned LPriority = SPQ->getNodePriority(left); 2377 unsigned RPriority = SPQ->getNodePriority(right); 2378 2379 // Be really careful about hoisting call operands above previous calls. 2380 // Only allows it if it would reduce register pressure. 2381 if (left->isCall && right->isCallOp) { 2382 unsigned RNumVals = right->getNode()->getNumValues(); 2383 RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0; 2384 } 2385 if (right->isCall && left->isCallOp) { 2386 unsigned LNumVals = left->getNode()->getNumValues(); 2387 LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0; 2388 } 2389 2390 if (LPriority != RPriority) 2391 return LPriority > RPriority; 2392 2393 // One or both of the nodes are calls and their sethi-ullman numbers are the 2394 // same, then keep source order. 2395 if (left->isCall || right->isCall) { 2396 unsigned LOrder = SPQ->getNodeOrdering(left); 2397 unsigned ROrder = SPQ->getNodeOrdering(right); 2398 2399 // Prefer an ordering where the lower the non-zero order number, the higher 2400 // the preference. 2401 if ((LOrder || ROrder) && LOrder != ROrder) 2402 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2403 } 2404 2405 // Try schedule def + use closer when Sethi-Ullman numbers are the same. 2406 // e.g. 2407 // t1 = op t2, c1 2408 // t3 = op t4, c2 2409 // 2410 // and the following instructions are both ready. 2411 // t2 = op c3 2412 // t4 = op c4 2413 // 2414 // Then schedule t2 = op first. 2415 // i.e. 2416 // t4 = op c4 2417 // t2 = op c3 2418 // t1 = op t2, c1 2419 // t3 = op t4, c2 2420 // 2421 // This creates more short live intervals. 2422 unsigned LDist = closestSucc(left); 2423 unsigned RDist = closestSucc(right); 2424 if (LDist != RDist) 2425 return LDist < RDist; 2426 2427 // How many registers becomes live when the node is scheduled. 2428 unsigned LScratch = calcMaxScratches(left); 2429 unsigned RScratch = calcMaxScratches(right); 2430 if (LScratch != RScratch) 2431 return LScratch > RScratch; 2432 2433 // Comparing latency against a call makes little sense unless the node 2434 // is register pressure-neutral. 2435 if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0)) 2436 return (left->NodeQueueId > right->NodeQueueId); 2437 2438 // Do not compare latencies when one or both of the nodes are calls. 2439 if (!DisableSchedCycles && 2440 !(left->isCall || right->isCall)) { 2441 int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); 2442 if (result != 0) 2443 return result > 0; 2444 } 2445 else { 2446 if (left->getHeight() != right->getHeight()) 2447 return left->getHeight() > right->getHeight(); 2448 2449 if (left->getDepth() != right->getDepth()) 2450 return left->getDepth() < right->getDepth(); 2451 } 2452 2453 assert(left->NodeQueueId && right->NodeQueueId && 2454 "NodeQueueId cannot be zero"); 2455 return (left->NodeQueueId > right->NodeQueueId); 2456 } 2457 2458 // Bottom up 2459 bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2460 if (int res = checkSpecialNodes(left, right)) 2461 return res > 0; 2462 2463 return BURRSort(left, right, SPQ); 2464 } 2465 2466 // Source order, otherwise bottom up. 2467 bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2468 if (int res = checkSpecialNodes(left, right)) 2469 return res > 0; 2470 2471 unsigned LOrder = SPQ->getNodeOrdering(left); 2472 unsigned ROrder = SPQ->getNodeOrdering(right); 2473 2474 // Prefer an ordering where the lower the non-zero order number, the higher 2475 // the preference. 2476 if ((LOrder || ROrder) && LOrder != ROrder) 2477 return LOrder != 0 && (LOrder < ROrder || ROrder == 0); 2478 2479 return BURRSort(left, right, SPQ); 2480 } 2481 2482 // If the time between now and when the instruction will be ready can cover 2483 // the spill code, then avoid adding it to the ready queue. This gives long 2484 // stalls highest priority and allows hoisting across calls. It should also 2485 // speed up processing the available queue. 2486 bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2487 static const unsigned ReadyDelay = 3; 2488 2489 if (SPQ->MayReduceRegPressure(SU)) return true; 2490 2491 if (SU->getHeight() > (CurCycle + ReadyDelay)) return false; 2492 2493 if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay) 2494 != ScheduleHazardRecognizer::NoHazard) 2495 return false; 2496 2497 return true; 2498 } 2499 2500 // Return true if right should be scheduled with higher priority than left. 2501 bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2502 if (int res = checkSpecialNodes(left, right)) 2503 return res > 0; 2504 2505 if (left->isCall || right->isCall) 2506 // No way to compute latency of calls. 2507 return BURRSort(left, right, SPQ); 2508 2509 bool LHigh = SPQ->HighRegPressure(left); 2510 bool RHigh = SPQ->HighRegPressure(right); 2511 // Avoid causing spills. If register pressure is high, schedule for 2512 // register pressure reduction. 2513 if (LHigh && !RHigh) { 2514 DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU(" 2515 << right->NodeNum << ")\n"); 2516 return true; 2517 } 2518 else if (!LHigh && RHigh) { 2519 DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU(" 2520 << left->NodeNum << ")\n"); 2521 return false; 2522 } 2523 if (!LHigh && !RHigh) { 2524 int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ); 2525 if (result != 0) 2526 return result > 0; 2527 } 2528 return BURRSort(left, right, SPQ); 2529 } 2530 2531 // Schedule as many instructions in each cycle as possible. So don't make an 2532 // instruction available unless it is ready in the current cycle. 2533 bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const { 2534 if (SU->getHeight() > CurCycle) return false; 2535 2536 if (SPQ->getHazardRec()->getHazardType(SU, 0) 2537 != ScheduleHazardRecognizer::NoHazard) 2538 return false; 2539 2540 return true; 2541 } 2542 2543 static bool canEnableCoalescing(SUnit *SU) { 2544 unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0; 2545 if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg) 2546 // CopyToReg should be close to its uses to facilitate coalescing and 2547 // avoid spilling. 2548 return true; 2549 2550 if (Opc == TargetOpcode::EXTRACT_SUBREG || 2551 Opc == TargetOpcode::SUBREG_TO_REG || 2552 Opc == TargetOpcode::INSERT_SUBREG) 2553 // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be 2554 // close to their uses to facilitate coalescing. 2555 return true; 2556 2557 if (SU->NumPreds == 0 && SU->NumSuccs != 0) 2558 // If SU does not have a register def, schedule it close to its uses 2559 // because it does not lengthen any live ranges. 2560 return true; 2561 2562 return false; 2563 } 2564 2565 // list-ilp is currently an experimental scheduler that allows various 2566 // heuristics to be enabled prior to the normal register reduction logic. 2567 bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { 2568 if (int res = checkSpecialNodes(left, right)) 2569 return res > 0; 2570 2571 if (left->isCall || right->isCall) 2572 // No way to compute latency of calls. 2573 return BURRSort(left, right, SPQ); 2574 2575 unsigned LLiveUses = 0, RLiveUses = 0; 2576 int LPDiff = 0, RPDiff = 0; 2577 if (!DisableSchedRegPressure || !DisableSchedLiveUses) { 2578 LPDiff = SPQ->RegPressureDiff(left, LLiveUses); 2579 RPDiff = SPQ->RegPressureDiff(right, RLiveUses); 2580 } 2581 if (!DisableSchedRegPressure && LPDiff != RPDiff) { 2582 DEBUG(dbgs() << "RegPressureDiff SU(" << left->NodeNum << "): " << LPDiff 2583 << " != SU(" << right->NodeNum << "): " << RPDiff << "\n"); 2584 return LPDiff > RPDiff; 2585 } 2586 2587 if (!DisableSchedRegPressure && (LPDiff > 0 || RPDiff > 0)) { 2588 bool LReduce = canEnableCoalescing(left); 2589 bool RReduce = canEnableCoalescing(right); 2590 if (LReduce && !RReduce) return false; 2591 if (RReduce && !LReduce) return true; 2592 } 2593 2594 if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) { 2595 DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses 2596 << " != SU(" << right->NodeNum << "): " << RLiveUses << "\n"); 2597 return LLiveUses < RLiveUses; 2598 } 2599 2600 if (!DisableSchedStalls) { 2601 bool LStall = BUHasStall(left, left->getHeight(), SPQ); 2602 bool RStall = BUHasStall(right, right->getHeight(), SPQ); 2603 if (LStall != RStall) 2604 return left->getHeight() > right->getHeight(); 2605 } 2606 2607 if (!DisableSchedCriticalPath) { 2608 int spread = (int)left->getDepth() - (int)right->getDepth(); 2609 if (std::abs(spread) > MaxReorderWindow) { 2610 DEBUG(dbgs() << "Depth of SU(" << left->NodeNum << "): " 2611 << left->getDepth() << " != SU(" << right->NodeNum << "): " 2612 << right->getDepth() << "\n"); 2613 return left->getDepth() < right->getDepth(); 2614 } 2615 } 2616 2617 if (!DisableSchedHeight && left->getHeight() != right->getHeight()) { 2618 int spread = (int)left->getHeight() - (int)right->getHeight(); 2619 if (std::abs(spread) > MaxReorderWindow) 2620 return left->getHeight() > right->getHeight(); 2621 } 2622 2623 return BURRSort(left, right, SPQ); 2624 } 2625 2626 void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) { 2627 SUnits = &sunits; 2628 // Add pseudo dependency edges for two-address nodes. 2629 if (!Disable2AddrHack) 2630 AddPseudoTwoAddrDeps(); 2631 // Reroute edges to nodes with multiple uses. 2632 if (!TracksRegPressure && !SrcOrder) 2633 PrescheduleNodesWithMultipleUses(); 2634 // Calculate node priorities. 2635 CalculateSethiUllmanNumbers(); 2636 2637 // For single block loops, mark nodes that look like canonical IV increments. 2638 if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) { 2639 for (unsigned i = 0, e = sunits.size(); i != e; ++i) { 2640 initVRegCycle(&sunits[i]); 2641 } 2642 } 2643 } 2644 2645 //===----------------------------------------------------------------------===// 2646 // Preschedule for Register Pressure 2647 //===----------------------------------------------------------------------===// 2648 2649 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) { 2650 if (SU->isTwoAddress) { 2651 unsigned Opc = SU->getNode()->getMachineOpcode(); 2652 const MCInstrDesc &MCID = TII->get(Opc); 2653 unsigned NumRes = MCID.getNumDefs(); 2654 unsigned NumOps = MCID.getNumOperands() - NumRes; 2655 for (unsigned i = 0; i != NumOps; ++i) { 2656 if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) { 2657 SDNode *DU = SU->getNode()->getOperand(i).getNode(); 2658 if (DU->getNodeId() != -1 && 2659 Op->OrigNode == &(*SUnits)[DU->getNodeId()]) 2660 return true; 2661 } 2662 } 2663 } 2664 return false; 2665 } 2666 2667 /// canClobberReachingPhysRegUse - True if SU would clobber one of it's 2668 /// successor's explicit physregs whose definition can reach DepSU. 2669 /// i.e. DepSU should not be scheduled above SU. 2670 static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, 2671 ScheduleDAGRRList *scheduleDAG, 2672 const TargetInstrInfo *TII, 2673 const TargetRegisterInfo *TRI) { 2674 const uint16_t *ImpDefs 2675 = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); 2676 const uint32_t *RegMask = getNodeRegMask(SU->getNode()); 2677 if(!ImpDefs && !RegMask) 2678 return false; 2679 2680 for (SUnit::const_succ_iterator SI = SU->Succs.begin(), SE = SU->Succs.end(); 2681 SI != SE; ++SI) { 2682 SUnit *SuccSU = SI->getSUnit(); 2683 for (SUnit::const_pred_iterator PI = SuccSU->Preds.begin(), 2684 PE = SuccSU->Preds.end(); PI != PE; ++PI) { 2685 if (!PI->isAssignedRegDep()) 2686 continue; 2687 2688 if (RegMask && MachineOperand::clobbersPhysReg(RegMask, PI->getReg()) && 2689 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2690 return true; 2691 2692 if (ImpDefs) 2693 for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) 2694 // Return true if SU clobbers this physical register use and the 2695 // definition of the register reaches from DepSU. IsReachable queries 2696 // a topological forward sort of the DAG (following the successors). 2697 if (TRI->regsOverlap(*ImpDef, PI->getReg()) && 2698 scheduleDAG->IsReachable(DepSU, PI->getSUnit())) 2699 return true; 2700 } 2701 } 2702 return false; 2703 } 2704 2705 /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's 2706 /// physical register defs. 2707 static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, 2708 const TargetInstrInfo *TII, 2709 const TargetRegisterInfo *TRI) { 2710 SDNode *N = SuccSU->getNode(); 2711 unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); 2712 const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); 2713 assert(ImpDefs && "Caller should check hasPhysRegDefs"); 2714 for (const SDNode *SUNode = SU->getNode(); SUNode; 2715 SUNode = SUNode->getGluedNode()) { 2716 if (!SUNode->isMachineOpcode()) 2717 continue; 2718 const uint16_t *SUImpDefs = 2719 TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); 2720 const uint32_t *SURegMask = getNodeRegMask(SUNode); 2721 if (!SUImpDefs && !SURegMask) 2722 continue; 2723 for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { 2724 EVT VT = N->getValueType(i); 2725 if (VT == MVT::Glue || VT == MVT::Other) 2726 continue; 2727 if (!N->hasAnyUseOfValue(i)) 2728 continue; 2729 unsigned Reg = ImpDefs[i - NumDefs]; 2730 if (SURegMask && MachineOperand::clobbersPhysReg(SURegMask, Reg)) 2731 return true; 2732 if (!SUImpDefs) 2733 continue; 2734 for (;*SUImpDefs; ++SUImpDefs) { 2735 unsigned SUReg = *SUImpDefs; 2736 if (TRI->regsOverlap(Reg, SUReg)) 2737 return true; 2738 } 2739 } 2740 } 2741 return false; 2742 } 2743 2744 /// PrescheduleNodesWithMultipleUses - Nodes with multiple uses 2745 /// are not handled well by the general register pressure reduction 2746 /// heuristics. When presented with code like this: 2747 /// 2748 /// N 2749 /// / | 2750 /// / | 2751 /// U store 2752 /// | 2753 /// ... 2754 /// 2755 /// the heuristics tend to push the store up, but since the 2756 /// operand of the store has another use (U), this would increase 2757 /// the length of that other use (the U->N edge). 2758 /// 2759 /// This function transforms code like the above to route U's 2760 /// dependence through the store when possible, like this: 2761 /// 2762 /// N 2763 /// || 2764 /// || 2765 /// store 2766 /// | 2767 /// U 2768 /// | 2769 /// ... 2770 /// 2771 /// This results in the store being scheduled immediately 2772 /// after N, which shortens the U->N live range, reducing 2773 /// register pressure. 2774 /// 2775 void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { 2776 // Visit all the nodes in topological order, working top-down. 2777 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2778 SUnit *SU = &(*SUnits)[i]; 2779 // For now, only look at nodes with no data successors, such as stores. 2780 // These are especially important, due to the heuristics in 2781 // getNodePriority for nodes with no data successors. 2782 if (SU->NumSuccs != 0) 2783 continue; 2784 // For now, only look at nodes with exactly one data predecessor. 2785 if (SU->NumPreds != 1) 2786 continue; 2787 // Avoid prescheduling copies to virtual registers, which don't behave 2788 // like other nodes from the perspective of scheduling heuristics. 2789 if (SDNode *N = SU->getNode()) 2790 if (N->getOpcode() == ISD::CopyToReg && 2791 TargetRegisterInfo::isVirtualRegister 2792 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2793 continue; 2794 2795 // Locate the single data predecessor. 2796 SUnit *PredSU = 0; 2797 for (SUnit::const_pred_iterator II = SU->Preds.begin(), 2798 EE = SU->Preds.end(); II != EE; ++II) 2799 if (!II->isCtrl()) { 2800 PredSU = II->getSUnit(); 2801 break; 2802 } 2803 assert(PredSU); 2804 2805 // Don't rewrite edges that carry physregs, because that requires additional 2806 // support infrastructure. 2807 if (PredSU->hasPhysRegDefs) 2808 continue; 2809 // Short-circuit the case where SU is PredSU's only data successor. 2810 if (PredSU->NumSuccs == 1) 2811 continue; 2812 // Avoid prescheduling to copies from virtual registers, which don't behave 2813 // like other nodes from the perspective of scheduling heuristics. 2814 if (SDNode *N = SU->getNode()) 2815 if (N->getOpcode() == ISD::CopyFromReg && 2816 TargetRegisterInfo::isVirtualRegister 2817 (cast<RegisterSDNode>(N->getOperand(1))->getReg())) 2818 continue; 2819 2820 // Perform checks on the successors of PredSU. 2821 for (SUnit::const_succ_iterator II = PredSU->Succs.begin(), 2822 EE = PredSU->Succs.end(); II != EE; ++II) { 2823 SUnit *PredSuccSU = II->getSUnit(); 2824 if (PredSuccSU == SU) continue; 2825 // If PredSU has another successor with no data successors, for 2826 // now don't attempt to choose either over the other. 2827 if (PredSuccSU->NumSuccs == 0) 2828 goto outer_loop_continue; 2829 // Don't break physical register dependencies. 2830 if (SU->hasPhysRegClobbers && PredSuccSU->hasPhysRegDefs) 2831 if (canClobberPhysRegDefs(PredSuccSU, SU, TII, TRI)) 2832 goto outer_loop_continue; 2833 // Don't introduce graph cycles. 2834 if (scheduleDAG->IsReachable(SU, PredSuccSU)) 2835 goto outer_loop_continue; 2836 } 2837 2838 // Ok, the transformation is safe and the heuristics suggest it is 2839 // profitable. Update the graph. 2840 DEBUG(dbgs() << " Prescheduling SU #" << SU->NodeNum 2841 << " next to PredSU #" << PredSU->NodeNum 2842 << " to guide scheduling in the presence of multiple uses\n"); 2843 for (unsigned i = 0; i != PredSU->Succs.size(); ++i) { 2844 SDep Edge = PredSU->Succs[i]; 2845 assert(!Edge.isAssignedRegDep()); 2846 SUnit *SuccSU = Edge.getSUnit(); 2847 if (SuccSU != SU) { 2848 Edge.setSUnit(PredSU); 2849 scheduleDAG->RemovePred(SuccSU, Edge); 2850 scheduleDAG->AddPred(SU, Edge); 2851 Edge.setSUnit(SU); 2852 scheduleDAG->AddPred(SuccSU, Edge); 2853 --i; 2854 } 2855 } 2856 outer_loop_continue:; 2857 } 2858 } 2859 2860 /// AddPseudoTwoAddrDeps - If two nodes share an operand and one of them uses 2861 /// it as a def&use operand. Add a pseudo control edge from it to the other 2862 /// node (if it won't create a cycle) so the two-address one will be scheduled 2863 /// first (lower in the schedule). If both nodes are two-address, favor the 2864 /// one that has a CopyToReg use (more likely to be a loop induction update). 2865 /// If both are two-address, but one is commutable while the other is not 2866 /// commutable, favor the one that's not commutable. 2867 void RegReductionPQBase::AddPseudoTwoAddrDeps() { 2868 for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { 2869 SUnit *SU = &(*SUnits)[i]; 2870 if (!SU->isTwoAddress) 2871 continue; 2872 2873 SDNode *Node = SU->getNode(); 2874 if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode()) 2875 continue; 2876 2877 bool isLiveOut = hasOnlyLiveOutUses(SU); 2878 unsigned Opc = Node->getMachineOpcode(); 2879 const MCInstrDesc &MCID = TII->get(Opc); 2880 unsigned NumRes = MCID.getNumDefs(); 2881 unsigned NumOps = MCID.getNumOperands() - NumRes; 2882 for (unsigned j = 0; j != NumOps; ++j) { 2883 if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1) 2884 continue; 2885 SDNode *DU = SU->getNode()->getOperand(j).getNode(); 2886 if (DU->getNodeId() == -1) 2887 continue; 2888 const SUnit *DUSU = &(*SUnits)[DU->getNodeId()]; 2889 if (!DUSU) continue; 2890 for (SUnit::const_succ_iterator I = DUSU->Succs.begin(), 2891 E = DUSU->Succs.end(); I != E; ++I) { 2892 if (I->isCtrl()) continue; 2893 SUnit *SuccSU = I->getSUnit(); 2894 if (SuccSU == SU) 2895 continue; 2896 // Be conservative. Ignore if nodes aren't at roughly the same 2897 // depth and height. 2898 if (SuccSU->getHeight() < SU->getHeight() && 2899 (SU->getHeight() - SuccSU->getHeight()) > 1) 2900 continue; 2901 // Skip past COPY_TO_REGCLASS nodes, so that the pseudo edge 2902 // constrains whatever is using the copy, instead of the copy 2903 // itself. In the case that the copy is coalesced, this 2904 // preserves the intent of the pseudo two-address heurietics. 2905 while (SuccSU->Succs.size() == 1 && 2906 SuccSU->getNode()->isMachineOpcode() && 2907 SuccSU->getNode()->getMachineOpcode() == 2908 TargetOpcode::COPY_TO_REGCLASS) 2909 SuccSU = SuccSU->Succs.front().getSUnit(); 2910 // Don't constrain non-instruction nodes. 2911 if (!SuccSU->getNode() || !SuccSU->getNode()->isMachineOpcode()) 2912 continue; 2913 // Don't constrain nodes with physical register defs if the 2914 // predecessor can clobber them. 2915 if (SuccSU->hasPhysRegDefs && SU->hasPhysRegClobbers) { 2916 if (canClobberPhysRegDefs(SuccSU, SU, TII, TRI)) 2917 continue; 2918 } 2919 // Don't constrain EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG; 2920 // these may be coalesced away. We want them close to their uses. 2921 unsigned SuccOpc = SuccSU->getNode()->getMachineOpcode(); 2922 if (SuccOpc == TargetOpcode::EXTRACT_SUBREG || 2923 SuccOpc == TargetOpcode::INSERT_SUBREG || 2924 SuccOpc == TargetOpcode::SUBREG_TO_REG) 2925 continue; 2926 if (!canClobberReachingPhysRegUse(SuccSU, SU, scheduleDAG, TII, TRI) && 2927 (!canClobber(SuccSU, DUSU) || 2928 (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || 2929 (!SU->isCommutable && SuccSU->isCommutable)) && 2930 !scheduleDAG->IsReachable(SuccSU, SU)) { 2931 DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #" 2932 << SU->NodeNum << " to SU #" << SuccSU->NodeNum << "\n"); 2933 scheduleDAG->AddPred(SU, SDep(SuccSU, SDep::Order, /*Latency=*/0, 2934 /*Reg=*/0, /*isNormalMemory=*/false, 2935 /*isMustAlias=*/false, 2936 /*isArtificial=*/true)); 2937 } 2938 } 2939 } 2940 } 2941 } 2942 2943 //===----------------------------------------------------------------------===// 2944 // Public Constructor Functions 2945 //===----------------------------------------------------------------------===// 2946 2947 llvm::ScheduleDAGSDNodes * 2948 llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, 2949 CodeGenOpt::Level OptLevel) { 2950 const TargetMachine &TM = IS->TM; 2951 const TargetInstrInfo *TII = TM.getInstrInfo(); 2952 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2953 2954 BURegReductionPriorityQueue *PQ = 2955 new BURegReductionPriorityQueue(*IS->MF, false, false, TII, TRI, 0); 2956 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2957 PQ->setScheduleDAG(SD); 2958 return SD; 2959 } 2960 2961 llvm::ScheduleDAGSDNodes * 2962 llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, 2963 CodeGenOpt::Level OptLevel) { 2964 const TargetMachine &TM = IS->TM; 2965 const TargetInstrInfo *TII = TM.getInstrInfo(); 2966 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2967 2968 SrcRegReductionPriorityQueue *PQ = 2969 new SrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, 0); 2970 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); 2971 PQ->setScheduleDAG(SD); 2972 return SD; 2973 } 2974 2975 llvm::ScheduleDAGSDNodes * 2976 llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, 2977 CodeGenOpt::Level OptLevel) { 2978 const TargetMachine &TM = IS->TM; 2979 const TargetInstrInfo *TII = TM.getInstrInfo(); 2980 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2981 const TargetLowering *TLI = &IS->getTargetLowering(); 2982 2983 HybridBURRPriorityQueue *PQ = 2984 new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 2985 2986 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 2987 PQ->setScheduleDAG(SD); 2988 return SD; 2989 } 2990 2991 llvm::ScheduleDAGSDNodes * 2992 llvm::createILPListDAGScheduler(SelectionDAGISel *IS, 2993 CodeGenOpt::Level OptLevel) { 2994 const TargetMachine &TM = IS->TM; 2995 const TargetInstrInfo *TII = TM.getInstrInfo(); 2996 const TargetRegisterInfo *TRI = TM.getRegisterInfo(); 2997 const TargetLowering *TLI = &IS->getTargetLowering(); 2998 2999 ILPBURRPriorityQueue *PQ = 3000 new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); 3001 ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel); 3002 PQ->setScheduleDAG(SD); 3003 return SD; 3004 } 3005