1 //===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements the ScheduleDAGInstrs class, which implements re-scheduling 11 // of MachineInstrs. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #define DEBUG_TYPE "misched" 16 #include "llvm/CodeGen/ScheduleDAGInstrs.h" 17 #include "llvm/ADT/MapVector.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallSet.h" 20 #include "llvm/Analysis/AliasAnalysis.h" 21 #include "llvm/Analysis/ValueTracking.h" 22 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineMemOperand.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/PseudoSourceValue.h" 27 #include "llvm/CodeGen/RegisterPressure.h" 28 #include "llvm/CodeGen/ScheduleDFS.h" 29 #include "llvm/IR/Operator.h" 30 #include "llvm/MC/MCInstrItineraries.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/Format.h" 34 #include "llvm/Support/raw_ostream.h" 35 #include "llvm/Target/TargetInstrInfo.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 using namespace llvm; 40 41 static cl::opt<bool> EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, 42 cl::ZeroOrMore, cl::init(false), 43 cl::desc("Enable use of AA during MI GAD construction")); 44 45 ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, 46 const MachineLoopInfo &mli, 47 const MachineDominatorTree &mdt, 48 bool IsPostRAFlag, 49 LiveIntervals *lis) 50 : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), LIS(lis), 51 IsPostRA(IsPostRAFlag), CanHandleTerminators(false), FirstDbgValue(0) { 52 assert((IsPostRA || LIS) && "PreRA scheduling requires LiveIntervals"); 53 DbgValues.clear(); 54 assert(!(IsPostRA && MRI.getNumVirtRegs()) && 55 "Virtual registers must be removed prior to PostRA scheduling"); 56 57 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 58 SchedModel.init(*ST.getSchedModel(), &ST, TII); 59 } 60 61 /// getUnderlyingObjectFromInt - This is the function that does the work of 62 /// looking through basic ptrtoint+arithmetic+inttoptr sequences. 63 static const Value *getUnderlyingObjectFromInt(const Value *V) { 64 do { 65 if (const Operator *U = dyn_cast<Operator>(V)) { 66 // If we find a ptrtoint, we can transfer control back to the 67 // regular getUnderlyingObjectFromInt. 68 if (U->getOpcode() == Instruction::PtrToInt) 69 return U->getOperand(0); 70 // If we find an add of a constant, a multiplied value, or a phi, it's 71 // likely that the other operand will lead us to the base 72 // object. We don't have to worry about the case where the 73 // object address is somehow being computed by the multiply, 74 // because our callers only care when the result is an 75 // identifiable object. 76 if (U->getOpcode() != Instruction::Add || 77 (!isa<ConstantInt>(U->getOperand(1)) && 78 Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && 79 !isa<PHINode>(U->getOperand(1)))) 80 return V; 81 V = U->getOperand(0); 82 } else { 83 return V; 84 } 85 assert(V->getType()->isIntegerTy() && "Unexpected operand type!"); 86 } while (1); 87 } 88 89 /// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects 90 /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. 91 static void getUnderlyingObjects(const Value *V, 92 SmallVectorImpl<Value *> &Objects) { 93 SmallPtrSet<const Value*, 16> Visited; 94 SmallVector<const Value *, 4> Working(1, V); 95 do { 96 V = Working.pop_back_val(); 97 98 SmallVector<Value *, 4> Objs; 99 GetUnderlyingObjects(const_cast<Value *>(V), Objs); 100 101 for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); 102 I != IE; ++I) { 103 V = *I; 104 if (!Visited.insert(V)) 105 continue; 106 if (Operator::getOpcode(V) == Instruction::IntToPtr) { 107 const Value *O = 108 getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0)); 109 if (O->getType()->isPointerTy()) { 110 Working.push_back(O); 111 continue; 112 } 113 } 114 Objects.push_back(const_cast<Value *>(V)); 115 } 116 } while (!Working.empty()); 117 } 118 119 typedef SmallVector<PointerIntPair<const Value *, 1, bool>, 4> 120 UnderlyingObjectsVector; 121 122 /// getUnderlyingObjectsForInstr - If this machine instr has memory reference 123 /// information and it can be tracked to a normal reference to a known 124 /// object, return the Value for that object. 125 static void getUnderlyingObjectsForInstr(const MachineInstr *MI, 126 const MachineFrameInfo *MFI, 127 UnderlyingObjectsVector &Objects) { 128 if (!MI->hasOneMemOperand() || 129 !(*MI->memoperands_begin())->getValue() || 130 (*MI->memoperands_begin())->isVolatile()) 131 return; 132 133 const Value *V = (*MI->memoperands_begin())->getValue(); 134 if (!V) 135 return; 136 137 SmallVector<Value *, 4> Objs; 138 getUnderlyingObjects(V, Objs); 139 140 for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end(); 141 I != IE; ++I) { 142 bool MayAlias = true; 143 V = *I; 144 145 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 146 // For now, ignore PseudoSourceValues which may alias LLVM IR values 147 // because the code that uses this function has no way to cope with 148 // such aliases. 149 150 if (PSV->isAliased(MFI)) { 151 Objects.clear(); 152 return; 153 } 154 155 MayAlias = PSV->mayAlias(MFI); 156 } else if (!isIdentifiedObject(V)) { 157 Objects.clear(); 158 return; 159 } 160 161 Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); 162 } 163 } 164 165 void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { 166 BB = bb; 167 } 168 169 void ScheduleDAGInstrs::finishBlock() { 170 // Subclasses should no longer refer to the old block. 171 BB = 0; 172 } 173 174 /// Initialize the DAG and common scheduler state for the current scheduling 175 /// region. This does not actually create the DAG, only clears it. The 176 /// scheduling driver may call BuildSchedGraph multiple times per scheduling 177 /// region. 178 void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, 179 MachineBasicBlock::iterator begin, 180 MachineBasicBlock::iterator end, 181 unsigned endcount) { 182 assert(bb == BB && "startBlock should set BB"); 183 RegionBegin = begin; 184 RegionEnd = end; 185 EndIndex = endcount; 186 MISUnitMap.clear(); 187 188 ScheduleDAG::clearDAG(); 189 } 190 191 /// Close the current scheduling region. Don't clear any state in case the 192 /// driver wants to refer to the previous scheduling region. 193 void ScheduleDAGInstrs::exitRegion() { 194 // Nothing to do. 195 } 196 197 /// addSchedBarrierDeps - Add dependencies from instructions in the current 198 /// list of instructions being scheduled to scheduling barrier by adding 199 /// the exit SU to the register defs and use list. This is because we want to 200 /// make sure instructions which define registers that are either used by 201 /// the terminator or are live-out are properly scheduled. This is 202 /// especially important when the definition latency of the return value(s) 203 /// are too high to be hidden by the branch or when the liveout registers 204 /// used by instructions in the fallthrough block. 205 void ScheduleDAGInstrs::addSchedBarrierDeps() { 206 MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : 0; 207 ExitSU.setInstr(ExitMI); 208 bool AllDepKnown = ExitMI && 209 (ExitMI->isCall() || ExitMI->isBarrier()); 210 if (ExitMI && AllDepKnown) { 211 // If it's a call or a barrier, add dependencies on the defs and uses of 212 // instruction. 213 for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) { 214 const MachineOperand &MO = ExitMI->getOperand(i); 215 if (!MO.isReg() || MO.isDef()) continue; 216 unsigned Reg = MO.getReg(); 217 if (Reg == 0) continue; 218 219 if (TRI->isPhysicalRegister(Reg)) 220 Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); 221 else { 222 assert(!IsPostRA && "Virtual register encountered after regalloc."); 223 if (MO.readsReg()) // ignore undef operands 224 addVRegUseDeps(&ExitSU, i); 225 } 226 } 227 } else { 228 // For others, e.g. fallthrough, conditional branch, assume the exit 229 // uses all the registers that are livein to the successor blocks. 230 assert(Uses.empty() && "Uses in set before adding deps?"); 231 for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), 232 SE = BB->succ_end(); SI != SE; ++SI) 233 for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), 234 E = (*SI)->livein_end(); I != E; ++I) { 235 unsigned Reg = *I; 236 if (!Uses.contains(Reg)) 237 Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); 238 } 239 } 240 } 241 242 /// MO is an operand of SU's instruction that defines a physical register. Add 243 /// data dependencies from SU to any uses of the physical register. 244 void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { 245 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx); 246 assert(MO.isDef() && "expect physreg def"); 247 248 // Ask the target if address-backscheduling is desirable, and if so how much. 249 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 250 251 for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); 252 Alias.isValid(); ++Alias) { 253 if (!Uses.contains(*Alias)) 254 continue; 255 for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { 256 SUnit *UseSU = I->SU; 257 if (UseSU == SU) 258 continue; 259 260 // Adjust the dependence latency using operand def/use information, 261 // then allow the target to perform its own adjustments. 262 int UseOp = I->OpIdx; 263 MachineInstr *RegUse = 0; 264 SDep Dep; 265 if (UseOp < 0) 266 Dep = SDep(SU, SDep::Artificial); 267 else { 268 // Set the hasPhysRegDefs only for physreg defs that have a use within 269 // the scheduling region. 270 SU->hasPhysRegDefs = true; 271 Dep = SDep(SU, SDep::Data, *Alias); 272 RegUse = UseSU->getInstr(); 273 } 274 Dep.setLatency( 275 SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, 276 UseOp)); 277 278 ST.adjustSchedDependency(SU, UseSU, Dep); 279 UseSU->addPred(Dep); 280 } 281 } 282 } 283 284 /// addPhysRegDeps - Add register dependencies (data, anti, and output) from 285 /// this SUnit to following instructions in the same scheduling region that 286 /// depend the physical register referenced at OperIdx. 287 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { 288 const MachineInstr *MI = SU->getInstr(); 289 const MachineOperand &MO = MI->getOperand(OperIdx); 290 291 // Optionally add output and anti dependencies. For anti 292 // dependencies we use a latency of 0 because for a multi-issue 293 // target we want to allow the defining instruction to issue 294 // in the same cycle as the using instruction. 295 // TODO: Using a latency of 1 here for output dependencies assumes 296 // there's no cost for reusing registers. 297 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output; 298 for (MCRegAliasIterator Alias(MO.getReg(), TRI, true); 299 Alias.isValid(); ++Alias) { 300 if (!Defs.contains(*Alias)) 301 continue; 302 for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { 303 SUnit *DefSU = I->SU; 304 if (DefSU == &ExitSU) 305 continue; 306 if (DefSU != SU && 307 (Kind != SDep::Output || !MO.isDead() || 308 !DefSU->getInstr()->registerDefIsDead(*Alias))) { 309 if (Kind == SDep::Anti) 310 DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); 311 else { 312 SDep Dep(SU, Kind, /*Reg=*/*Alias); 313 Dep.setLatency( 314 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); 315 DefSU->addPred(Dep); 316 } 317 } 318 } 319 } 320 321 if (!MO.isDef()) { 322 SU->hasPhysRegUses = true; 323 // Either insert a new Reg2SUnits entry with an empty SUnits list, or 324 // retrieve the existing SUnits list for this register's uses. 325 // Push this SUnit on the use list. 326 Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); 327 } 328 else { 329 addPhysRegDataDeps(SU, OperIdx); 330 unsigned Reg = MO.getReg(); 331 332 // clear this register's use list 333 if (Uses.contains(Reg)) 334 Uses.eraseAll(Reg); 335 336 if (!MO.isDead()) { 337 Defs.eraseAll(Reg); 338 } else if (SU->isCall) { 339 // Calls will not be reordered because of chain dependencies (see 340 // below). Since call operands are dead, calls may continue to be added 341 // to the DefList making dependence checking quadratic in the size of 342 // the block. Instead, we leave only one call at the back of the 343 // DefList. 344 Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); 345 Reg2SUnitsMap::iterator B = P.first; 346 Reg2SUnitsMap::iterator I = P.second; 347 for (bool isBegin = I == B; !isBegin; /* empty */) { 348 isBegin = (--I) == B; 349 if (!I->SU->isCall) 350 break; 351 I = Defs.erase(I); 352 } 353 } 354 355 // Defs are pushed in the order they are visited and never reordered. 356 Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); 357 } 358 } 359 360 /// addVRegDefDeps - Add register output and data dependencies from this SUnit 361 /// to instructions that occur later in the same scheduling region if they read 362 /// from or write to the virtual register defined at OperIdx. 363 /// 364 /// TODO: Hoist loop induction variable increments. This has to be 365 /// reevaluated. Generally, IV scheduling should be done before coalescing. 366 void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { 367 const MachineInstr *MI = SU->getInstr(); 368 unsigned Reg = MI->getOperand(OperIdx).getReg(); 369 370 // Singly defined vregs do not have output/anti dependencies. 371 // The current operand is a def, so we have at least one. 372 // Check here if there are any others... 373 if (MRI.hasOneDef(Reg)) 374 return; 375 376 // Add output dependence to the next nearest def of this vreg. 377 // 378 // Unless this definition is dead, the output dependence should be 379 // transitively redundant with antidependencies from this definition's 380 // uses. We're conservative for now until we have a way to guarantee the uses 381 // are not eliminated sometime during scheduling. The output dependence edge 382 // is also useful if output latency exceeds def-use latency. 383 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 384 if (DefI == VRegDefs.end()) 385 VRegDefs.insert(VReg2SUnit(Reg, SU)); 386 else { 387 SUnit *DefSU = DefI->SU; 388 if (DefSU != SU && DefSU != &ExitSU) { 389 SDep Dep(SU, SDep::Output, Reg); 390 Dep.setLatency( 391 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); 392 DefSU->addPred(Dep); 393 } 394 DefI->SU = SU; 395 } 396 } 397 398 /// addVRegUseDeps - Add a register data dependency if the instruction that 399 /// defines the virtual register used at OperIdx is mapped to an SUnit. Add a 400 /// register antidependency from this SUnit to instructions that occur later in 401 /// the same scheduling region if they write the virtual register. 402 /// 403 /// TODO: Handle ExitSU "uses" properly. 404 void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { 405 MachineInstr *MI = SU->getInstr(); 406 unsigned Reg = MI->getOperand(OperIdx).getReg(); 407 408 // Lookup this operand's reaching definition. 409 assert(LIS && "vreg dependencies requires LiveIntervals"); 410 LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); 411 VNInfo *VNI = LRQ.valueIn(); 412 413 // VNI will be valid because MachineOperand::readsReg() is checked by caller. 414 assert(VNI && "No value to read by operand"); 415 MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); 416 // Phis and other noninstructions (after coalescing) have a NULL Def. 417 if (Def) { 418 SUnit *DefSU = getSUnit(Def); 419 if (DefSU) { 420 // The reaching Def lives within this scheduling region. 421 // Create a data dependence. 422 SDep dep(DefSU, SDep::Data, Reg); 423 // Adjust the dependence latency using operand def/use information, then 424 // allow the target to perform its own adjustments. 425 int DefOp = Def->findRegisterDefOperandIdx(Reg); 426 dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); 427 428 const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>(); 429 ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep)); 430 SU->addPred(dep); 431 } 432 } 433 434 // Add antidependence to the following def of the vreg it uses. 435 VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); 436 if (DefI != VRegDefs.end() && DefI->SU != SU) 437 DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); 438 } 439 440 /// Return true if MI is an instruction we are unable to reason about 441 /// (like a call or something with unmodeled side effects). 442 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) { 443 if (MI->isCall() || MI->hasUnmodeledSideEffects() || 444 (MI->hasOrderedMemoryRef() && 445 (!MI->mayLoad() || !MI->isInvariantLoad(AA)))) 446 return true; 447 return false; 448 } 449 450 // This MI might have either incomplete info, or known to be unsafe 451 // to deal with (i.e. volatile object). 452 static inline bool isUnsafeMemoryObject(MachineInstr *MI, 453 const MachineFrameInfo *MFI) { 454 if (!MI || MI->memoperands_empty()) 455 return true; 456 // We purposefully do no check for hasOneMemOperand() here 457 // in hope to trigger an assert downstream in order to 458 // finish implementation. 459 if ((*MI->memoperands_begin())->isVolatile() || 460 MI->hasUnmodeledSideEffects()) 461 return true; 462 const Value *V = (*MI->memoperands_begin())->getValue(); 463 if (!V) 464 return true; 465 466 SmallVector<Value *, 4> Objs; 467 getUnderlyingObjects(V, Objs); 468 for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), 469 IE = Objs.end(); I != IE; ++I) { 470 V = *I; 471 472 if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) { 473 // Similarly to getUnderlyingObjectForInstr: 474 // For now, ignore PseudoSourceValues which may alias LLVM IR values 475 // because the code that uses this function has no way to cope with 476 // such aliases. 477 if (PSV->isAliased(MFI)) 478 return true; 479 } 480 481 // Does this pointer refer to a distinct and identifiable object? 482 if (!isIdentifiedObject(V)) 483 return true; 484 } 485 486 return false; 487 } 488 489 /// This returns true if the two MIs need a chain edge betwee them. 490 /// If these are not even memory operations, we still may need 491 /// chain deps between them. The question really is - could 492 /// these two MIs be reordered during scheduling from memory dependency 493 /// point of view. 494 static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI, 495 MachineInstr *MIa, 496 MachineInstr *MIb) { 497 // Cover a trivial case - no edge is need to itself. 498 if (MIa == MIb) 499 return false; 500 501 if (isUnsafeMemoryObject(MIa, MFI) || isUnsafeMemoryObject(MIb, MFI)) 502 return true; 503 504 // If we are dealing with two "normal" loads, we do not need an edge 505 // between them - they could be reordered. 506 if (!MIa->mayStore() && !MIb->mayStore()) 507 return false; 508 509 // To this point analysis is generic. From here on we do need AA. 510 if (!AA) 511 return true; 512 513 MachineMemOperand *MMOa = *MIa->memoperands_begin(); 514 MachineMemOperand *MMOb = *MIb->memoperands_begin(); 515 516 // FIXME: Need to handle multiple memory operands to support all targets. 517 if (!MIa->hasOneMemOperand() || !MIb->hasOneMemOperand()) 518 llvm_unreachable("Multiple memory operands."); 519 520 // The following interface to AA is fashioned after DAGCombiner::isAlias 521 // and operates with MachineMemOperand offset with some important 522 // assumptions: 523 // - LLVM fundamentally assumes flat address spaces. 524 // - MachineOperand offset can *only* result from legalization and 525 // cannot affect queries other than the trivial case of overlap 526 // checking. 527 // - These offsets never wrap and never step outside 528 // of allocated objects. 529 // - There should never be any negative offsets here. 530 // 531 // FIXME: Modify API to hide this math from "user" 532 // FIXME: Even before we go to AA we can reason locally about some 533 // memory objects. It can save compile time, and possibly catch some 534 // corner cases not currently covered. 535 536 assert ((MMOa->getOffset() >= 0) && "Negative MachineMemOperand offset"); 537 assert ((MMOb->getOffset() >= 0) && "Negative MachineMemOperand offset"); 538 539 int64_t MinOffset = std::min(MMOa->getOffset(), MMOb->getOffset()); 540 int64_t Overlapa = MMOa->getSize() + MMOa->getOffset() - MinOffset; 541 int64_t Overlapb = MMOb->getSize() + MMOb->getOffset() - MinOffset; 542 543 AliasAnalysis::AliasResult AAResult = AA->alias( 544 AliasAnalysis::Location(MMOa->getValue(), Overlapa, 545 MMOa->getTBAAInfo()), 546 AliasAnalysis::Location(MMOb->getValue(), Overlapb, 547 MMOb->getTBAAInfo())); 548 549 return (AAResult != AliasAnalysis::NoAlias); 550 } 551 552 /// This recursive function iterates over chain deps of SUb looking for 553 /// "latest" node that needs a chain edge to SUa. 554 static unsigned 555 iterateChainSucc(AliasAnalysis *AA, const MachineFrameInfo *MFI, 556 SUnit *SUa, SUnit *SUb, SUnit *ExitSU, unsigned *Depth, 557 SmallPtrSet<const SUnit*, 16> &Visited) { 558 if (!SUa || !SUb || SUb == ExitSU) 559 return *Depth; 560 561 // Remember visited nodes. 562 if (!Visited.insert(SUb)) 563 return *Depth; 564 // If there is _some_ dependency already in place, do not 565 // descend any further. 566 // TODO: Need to make sure that if that dependency got eliminated or ignored 567 // for any reason in the future, we would not violate DAG topology. 568 // Currently it does not happen, but makes an implicit assumption about 569 // future implementation. 570 // 571 // Independently, if we encounter node that is some sort of global 572 // object (like a call) we already have full set of dependencies to it 573 // and we can stop descending. 574 if (SUa->isSucc(SUb) || 575 isGlobalMemoryObject(AA, SUb->getInstr())) 576 return *Depth; 577 578 // If we do need an edge, or we have exceeded depth budget, 579 // add that edge to the predecessors chain of SUb, 580 // and stop descending. 581 if (*Depth > 200 || 582 MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { 583 SUb->addPred(SDep(SUa, SDep::MayAliasMem)); 584 return *Depth; 585 } 586 // Track current depth. 587 (*Depth)++; 588 // Iterate over chain dependencies only. 589 for (SUnit::const_succ_iterator I = SUb->Succs.begin(), E = SUb->Succs.end(); 590 I != E; ++I) 591 if (I->isCtrl()) 592 iterateChainSucc (AA, MFI, SUa, I->getSUnit(), ExitSU, Depth, Visited); 593 return *Depth; 594 } 595 596 /// This function assumes that "downward" from SU there exist 597 /// tail/leaf of already constructed DAG. It iterates downward and 598 /// checks whether SU can be aliasing any node dominated 599 /// by it. 600 static void adjustChainDeps(AliasAnalysis *AA, const MachineFrameInfo *MFI, 601 SUnit *SU, SUnit *ExitSU, std::set<SUnit *> &CheckList, 602 unsigned LatencyToLoad) { 603 if (!SU) 604 return; 605 606 SmallPtrSet<const SUnit*, 16> Visited; 607 unsigned Depth = 0; 608 609 for (std::set<SUnit *>::iterator I = CheckList.begin(), IE = CheckList.end(); 610 I != IE; ++I) { 611 if (SU == *I) 612 continue; 613 if (MIsNeedChainEdge(AA, MFI, SU->getInstr(), (*I)->getInstr())) { 614 SDep Dep(SU, SDep::MayAliasMem); 615 Dep.setLatency(((*I)->getInstr()->mayLoad()) ? LatencyToLoad : 0); 616 (*I)->addPred(Dep); 617 } 618 // Now go through all the chain successors and iterate from them. 619 // Keep track of visited nodes. 620 for (SUnit::const_succ_iterator J = (*I)->Succs.begin(), 621 JE = (*I)->Succs.end(); J != JE; ++J) 622 if (J->isCtrl()) 623 iterateChainSucc (AA, MFI, SU, J->getSUnit(), 624 ExitSU, &Depth, Visited); 625 } 626 } 627 628 /// Check whether two objects need a chain edge, if so, add it 629 /// otherwise remember the rejected SU. 630 static inline 631 void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, 632 SUnit *SUa, SUnit *SUb, 633 std::set<SUnit *> &RejectList, 634 unsigned TrueMemOrderLatency = 0, 635 bool isNormalMemory = false) { 636 // If this is a false dependency, 637 // do not add the edge, but rememeber the rejected node. 638 if (!EnableAASchedMI || 639 MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { 640 SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); 641 Dep.setLatency(TrueMemOrderLatency); 642 SUb->addPred(Dep); 643 } 644 else { 645 // Duplicate entries should be ignored. 646 RejectList.insert(SUb); 647 DEBUG(dbgs() << "\tReject chain dep between SU(" 648 << SUa->NodeNum << ") and SU(" 649 << SUb->NodeNum << ")\n"); 650 } 651 } 652 653 /// Create an SUnit for each real instruction, numbered in top-down toplological 654 /// order. The instruction order A < B, implies that no edge exists from B to A. 655 /// 656 /// Map each real instruction to its SUnit. 657 /// 658 /// After initSUnits, the SUnits vector cannot be resized and the scheduler may 659 /// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs 660 /// instead of pointers. 661 /// 662 /// MachineScheduler relies on initSUnits numbering the nodes by their order in 663 /// the original instruction list. 664 void ScheduleDAGInstrs::initSUnits() { 665 // We'll be allocating one SUnit for each real instruction in the region, 666 // which is contained within a basic block. 667 SUnits.reserve(BB->size()); 668 669 for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { 670 MachineInstr *MI = I; 671 if (MI->isDebugValue()) 672 continue; 673 674 SUnit *SU = newSUnit(MI); 675 MISUnitMap[MI] = SU; 676 677 SU->isCall = MI->isCall(); 678 SU->isCommutable = MI->isCommutable(); 679 680 // Assign the Latency field of SU using target-provided information. 681 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr()); 682 } 683 } 684 685 /// If RegPressure is non null, compute register pressure as a side effect. The 686 /// DAG builder is an efficient place to do it because it already visits 687 /// operands. 688 void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, 689 RegPressureTracker *RPTracker) { 690 // Create an SUnit for each real instruction. 691 initSUnits(); 692 693 // We build scheduling units by walking a block's instruction list from bottom 694 // to top. 695 696 // Remember where a generic side-effecting instruction is as we procede. 697 SUnit *BarrierChain = 0, *AliasChain = 0; 698 699 // Memory references to specific known memory locations are tracked 700 // so that they can be given more precise dependencies. We track 701 // separately the known memory locations that may alias and those 702 // that are known not to alias 703 MapVector<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs; 704 MapVector<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses; 705 std::set<SUnit*> RejectMemNodes; 706 707 // Remove any stale debug info; sometimes BuildSchedGraph is called again 708 // without emitting the info from the previous call. 709 DbgValues.clear(); 710 FirstDbgValue = NULL; 711 712 assert(Defs.empty() && Uses.empty() && 713 "Only BuildGraph should update Defs/Uses"); 714 Defs.setUniverse(TRI->getNumRegs()); 715 Uses.setUniverse(TRI->getNumRegs()); 716 717 assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); 718 // FIXME: Allow SparseSet to reserve space for the creation of virtual 719 // registers during scheduling. Don't artificially inflate the Universe 720 // because we want to assert that vregs are not created during DAG building. 721 VRegDefs.setUniverse(MRI.getNumVirtRegs()); 722 723 // Model data dependencies between instructions being scheduled and the 724 // ExitSU. 725 addSchedBarrierDeps(); 726 727 // Walk the list of instructions, from bottom moving up. 728 MachineInstr *DbgMI = NULL; 729 for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; 730 MII != MIE; --MII) { 731 MachineInstr *MI = prior(MII); 732 if (MI && DbgMI) { 733 DbgValues.push_back(std::make_pair(DbgMI, MI)); 734 DbgMI = NULL; 735 } 736 737 if (MI->isDebugValue()) { 738 DbgMI = MI; 739 continue; 740 } 741 if (RPTracker) { 742 RPTracker->recede(); 743 assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); 744 } 745 746 assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && 747 "Cannot schedule terminators or labels!"); 748 749 SUnit *SU = MISUnitMap[MI]; 750 assert(SU && "No SUnit mapped to this MI"); 751 752 // Add register-based dependencies (data, anti, and output). 753 bool HasVRegDef = false; 754 for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { 755 const MachineOperand &MO = MI->getOperand(j); 756 if (!MO.isReg()) continue; 757 unsigned Reg = MO.getReg(); 758 if (Reg == 0) continue; 759 760 if (TRI->isPhysicalRegister(Reg)) 761 addPhysRegDeps(SU, j); 762 else { 763 assert(!IsPostRA && "Virtual register encountered!"); 764 if (MO.isDef()) { 765 HasVRegDef = true; 766 addVRegDefDeps(SU, j); 767 } 768 else if (MO.readsReg()) // ignore undef operands 769 addVRegUseDeps(SU, j); 770 } 771 } 772 // If we haven't seen any uses in this scheduling region, create a 773 // dependence edge to ExitSU to model the live-out latency. This is required 774 // for vreg defs with no in-region use, and prefetches with no vreg def. 775 // 776 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This 777 // check currently relies on being called before adding chain deps. 778 if (SU->NumSuccs == 0 && SU->Latency > 1 779 && (HasVRegDef || MI->mayLoad())) { 780 SDep Dep(SU, SDep::Artificial); 781 Dep.setLatency(SU->Latency - 1); 782 ExitSU.addPred(Dep); 783 } 784 785 // Add chain dependencies. 786 // Chain dependencies used to enforce memory order should have 787 // latency of 0 (except for true dependency of Store followed by 788 // aliased Load... we estimate that with a single cycle of latency 789 // assuming the hardware will bypass) 790 // Note that isStoreToStackSlot and isLoadFromStackSLot are not usable 791 // after stack slots are lowered to actual addresses. 792 // TODO: Use an AliasAnalysis and do real alias-analysis queries, and 793 // produce more precise dependence information. 794 unsigned TrueMemOrderLatency = MI->mayStore() ? 1 : 0; 795 if (isGlobalMemoryObject(AA, MI)) { 796 // Be conservative with these and add dependencies on all memory 797 // references, even those that are known to not alias. 798 for (MapVector<const Value *, SUnit *>::iterator I = 799 NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { 800 I->second->addPred(SDep(SU, SDep::Barrier)); 801 } 802 for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = 803 NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { 804 for (unsigned i = 0, e = I->second.size(); i != e; ++i) { 805 SDep Dep(SU, SDep::Barrier); 806 Dep.setLatency(TrueMemOrderLatency); 807 I->second[i]->addPred(Dep); 808 } 809 } 810 // Add SU to the barrier chain. 811 if (BarrierChain) 812 BarrierChain->addPred(SDep(SU, SDep::Barrier)); 813 BarrierChain = SU; 814 // This is a barrier event that acts as a pivotal node in the DAG, 815 // so it is safe to clear list of exposed nodes. 816 adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, 817 TrueMemOrderLatency); 818 RejectMemNodes.clear(); 819 NonAliasMemDefs.clear(); 820 NonAliasMemUses.clear(); 821 822 // fall-through 823 new_alias_chain: 824 // Chain all possibly aliasing memory references though SU. 825 if (AliasChain) { 826 unsigned ChainLatency = 0; 827 if (AliasChain->getInstr()->mayLoad()) 828 ChainLatency = TrueMemOrderLatency; 829 addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, 830 ChainLatency); 831 } 832 AliasChain = SU; 833 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 834 addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, 835 TrueMemOrderLatency); 836 for (MapVector<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(), 837 E = AliasMemDefs.end(); I != E; ++I) 838 addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); 839 for (MapVector<const Value *, std::vector<SUnit *> >::iterator I = 840 AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { 841 for (unsigned i = 0, e = I->second.size(); i != e; ++i) 842 addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, 843 TrueMemOrderLatency); 844 } 845 adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, 846 TrueMemOrderLatency); 847 PendingLoads.clear(); 848 AliasMemDefs.clear(); 849 AliasMemUses.clear(); 850 } else if (MI->mayStore()) { 851 UnderlyingObjectsVector Objs; 852 getUnderlyingObjectsForInstr(MI, MFI, Objs); 853 854 if (Objs.empty()) { 855 // Treat all other stores conservatively. 856 goto new_alias_chain; 857 } 858 859 bool MayAlias = false; 860 for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); 861 K != KE; ++K) { 862 const Value *V = K->getPointer(); 863 bool ThisMayAlias = K->getInt(); 864 if (ThisMayAlias) 865 MayAlias = true; 866 867 // A store to a specific PseudoSourceValue. Add precise dependencies. 868 // Record the def in MemDefs, first adding a dep if there is 869 // an existing def. 870 MapVector<const Value *, SUnit *>::iterator I = 871 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 872 MapVector<const Value *, SUnit *>::iterator IE = 873 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 874 if (I != IE) { 875 addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); 876 I->second = SU; 877 } else { 878 if (ThisMayAlias) 879 AliasMemDefs[V] = SU; 880 else 881 NonAliasMemDefs[V] = SU; 882 } 883 // Handle the uses in MemUses, if there are any. 884 MapVector<const Value *, std::vector<SUnit *> >::iterator J = 885 ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); 886 MapVector<const Value *, std::vector<SUnit *> >::iterator JE = 887 ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); 888 if (J != JE) { 889 for (unsigned i = 0, e = J->second.size(); i != e; ++i) 890 addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, 891 TrueMemOrderLatency, true); 892 J->second.clear(); 893 } 894 } 895 if (MayAlias) { 896 // Add dependencies from all the PendingLoads, i.e. loads 897 // with no underlying object. 898 for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) 899 addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, 900 TrueMemOrderLatency); 901 // Add dependence on alias chain, if needed. 902 if (AliasChain) 903 addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); 904 // But we also should check dependent instructions for the 905 // SU in question. 906 adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, 907 TrueMemOrderLatency); 908 } 909 // Add dependence on barrier chain, if needed. 910 // There is no point to check aliasing on barrier event. Even if 911 // SU and barrier _could_ be reordered, they should not. In addition, 912 // we have lost all RejectMemNodes below barrier. 913 if (BarrierChain) 914 BarrierChain->addPred(SDep(SU, SDep::Barrier)); 915 916 if (!ExitSU.isPred(SU)) 917 // Push store's up a bit to avoid them getting in between cmp 918 // and branches. 919 ExitSU.addPred(SDep(SU, SDep::Artificial)); 920 } else if (MI->mayLoad()) { 921 bool MayAlias = true; 922 if (MI->isInvariantLoad(AA)) { 923 // Invariant load, no chain dependencies needed! 924 } else { 925 UnderlyingObjectsVector Objs; 926 getUnderlyingObjectsForInstr(MI, MFI, Objs); 927 928 if (Objs.empty()) { 929 // A load with no underlying object. Depend on all 930 // potentially aliasing stores. 931 for (MapVector<const Value *, SUnit *>::iterator I = 932 AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) 933 addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); 934 935 PendingLoads.push_back(SU); 936 MayAlias = true; 937 } else { 938 MayAlias = false; 939 } 940 941 for (UnderlyingObjectsVector::iterator 942 J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { 943 const Value *V = J->getPointer(); 944 bool ThisMayAlias = J->getInt(); 945 946 if (ThisMayAlias) 947 MayAlias = true; 948 949 // A load from a specific PseudoSourceValue. Add precise dependencies. 950 MapVector<const Value *, SUnit *>::iterator I = 951 ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); 952 MapVector<const Value *, SUnit *>::iterator IE = 953 ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); 954 if (I != IE) 955 addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); 956 if (ThisMayAlias) 957 AliasMemUses[V].push_back(SU); 958 else 959 NonAliasMemUses[V].push_back(SU); 960 } 961 if (MayAlias) 962 adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); 963 // Add dependencies on alias and barrier chains, if needed. 964 if (MayAlias && AliasChain) 965 addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); 966 if (BarrierChain) 967 BarrierChain->addPred(SDep(SU, SDep::Barrier)); 968 } 969 } 970 } 971 if (DbgMI) 972 FirstDbgValue = DbgMI; 973 974 Defs.clear(); 975 Uses.clear(); 976 VRegDefs.clear(); 977 PendingLoads.clear(); 978 } 979 980 void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const { 981 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 982 SU->getInstr()->dump(); 983 #endif 984 } 985 986 std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { 987 std::string s; 988 raw_string_ostream oss(s); 989 if (SU == &EntrySU) 990 oss << "<entry>"; 991 else if (SU == &ExitSU) 992 oss << "<exit>"; 993 else 994 SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); 995 return oss.str(); 996 } 997 998 /// Return the basic block label. It is not necessarilly unique because a block 999 /// contains multiple scheduling regions. But it is fine for visualization. 1000 std::string ScheduleDAGInstrs::getDAGName() const { 1001 return "dag." + BB->getFullName(); 1002 } 1003 1004 //===----------------------------------------------------------------------===// 1005 // SchedDFSResult Implementation 1006 //===----------------------------------------------------------------------===// 1007 1008 namespace llvm { 1009 /// \brief Internal state used to compute SchedDFSResult. 1010 class SchedDFSImpl { 1011 SchedDFSResult &R; 1012 1013 /// Join DAG nodes into equivalence classes by their subtree. 1014 IntEqClasses SubtreeClasses; 1015 /// List PredSU, SuccSU pairs that represent data edges between subtrees. 1016 std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs; 1017 1018 struct RootData { 1019 unsigned NodeID; 1020 unsigned ParentNodeID; // Parent node (member of the parent subtree). 1021 unsigned SubInstrCount; // Instr count in this tree only, not children. 1022 1023 RootData(unsigned id): NodeID(id), 1024 ParentNodeID(SchedDFSResult::InvalidSubtreeID), 1025 SubInstrCount(0) {} 1026 1027 unsigned getSparseSetIndex() const { return NodeID; } 1028 }; 1029 1030 SparseSet<RootData> RootSet; 1031 1032 public: 1033 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { 1034 RootSet.setUniverse(R.DFSNodeData.size()); 1035 } 1036 1037 /// Return true if this node been visited by the DFS traversal. 1038 /// 1039 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node 1040 /// ID. Later, SubtreeID is updated but remains valid. 1041 bool isVisited(const SUnit *SU) const { 1042 return R.DFSNodeData[SU->NodeNum].SubtreeID 1043 != SchedDFSResult::InvalidSubtreeID; 1044 } 1045 1046 /// Initialize this node's instruction count. We don't need to flag the node 1047 /// visited until visitPostorder because the DAG cannot have cycles. 1048 void visitPreorder(const SUnit *SU) { 1049 R.DFSNodeData[SU->NodeNum].InstrCount = 1050 SU->getInstr()->isTransient() ? 0 : 1; 1051 } 1052 1053 /// Called once for each node after all predecessors are visited. Revisit this 1054 /// node's predecessors and potentially join them now that we know the ILP of 1055 /// the other predecessors. 1056 void visitPostorderNode(const SUnit *SU) { 1057 // Mark this node as the root of a subtree. It may be joined with its 1058 // successors later. 1059 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; 1060 RootData RData(SU->NodeNum); 1061 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; 1062 1063 // If any predecessors are still in their own subtree, they either cannot be 1064 // joined or are large enough to remain separate. If this parent node's 1065 // total instruction count is not greater than a child subtree by at least 1066 // the subtree limit, then try to join it now since splitting subtrees is 1067 // only useful if multiple high-pressure paths are possible. 1068 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; 1069 for (SUnit::const_pred_iterator 1070 PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { 1071 if (PI->getKind() != SDep::Data) 1072 continue; 1073 unsigned PredNum = PI->getSUnit()->NodeNum; 1074 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) 1075 joinPredSubtree(*PI, SU, /*CheckLimit=*/false); 1076 1077 // Either link or merge the TreeData entry from the child to the parent. 1078 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { 1079 // If the predecessor's parent is invalid, this is a tree edge and the 1080 // current node is the parent. 1081 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) 1082 RootSet[PredNum].ParentNodeID = SU->NodeNum; 1083 } 1084 else if (RootSet.count(PredNum)) { 1085 // The predecessor is not a root, but is still in the root set. This 1086 // must be the new parent that it was just joined to. Note that 1087 // RootSet[PredNum].ParentNodeID may either be invalid or may still be 1088 // set to the original parent. 1089 RData.SubInstrCount += RootSet[PredNum].SubInstrCount; 1090 RootSet.erase(PredNum); 1091 } 1092 } 1093 RootSet[SU->NodeNum] = RData; 1094 } 1095 1096 /// Called once for each tree edge after calling visitPostOrderNode on the 1097 /// predecessor. Increment the parent node's instruction count and 1098 /// preemptively join this subtree to its parent's if it is small enough. 1099 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { 1100 R.DFSNodeData[Succ->NodeNum].InstrCount 1101 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; 1102 joinPredSubtree(PredDep, Succ); 1103 } 1104 1105 /// Add a connection for cross edges. 1106 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { 1107 ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); 1108 } 1109 1110 /// Set each node's subtree ID to the representative ID and record connections 1111 /// between trees. 1112 void finalize() { 1113 SubtreeClasses.compress(); 1114 R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); 1115 assert(SubtreeClasses.getNumClasses() == RootSet.size() 1116 && "number of roots should match trees"); 1117 for (SparseSet<RootData>::const_iterator 1118 RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { 1119 unsigned TreeID = SubtreeClasses[RI->NodeID]; 1120 if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) 1121 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; 1122 R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; 1123 // Note that SubInstrCount may be greater than InstrCount if we joined 1124 // subtrees across a cross edge. InstrCount will be attributed to the 1125 // original parent, while SubInstrCount will be attributed to the joined 1126 // parent. 1127 } 1128 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); 1129 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); 1130 DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); 1131 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { 1132 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; 1133 DEBUG(dbgs() << " SU(" << Idx << ") in tree " 1134 << R.DFSNodeData[Idx].SubtreeID << '\n'); 1135 } 1136 for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator 1137 I = ConnectionPairs.begin(), E = ConnectionPairs.end(); 1138 I != E; ++I) { 1139 unsigned PredTree = SubtreeClasses[I->first->NodeNum]; 1140 unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; 1141 if (PredTree == SuccTree) 1142 continue; 1143 unsigned Depth = I->first->getDepth(); 1144 addConnection(PredTree, SuccTree, Depth); 1145 addConnection(SuccTree, PredTree, Depth); 1146 } 1147 } 1148 1149 protected: 1150 /// Join the predecessor subtree with the successor that is its DFS 1151 /// parent. Apply some heuristics before joining. 1152 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, 1153 bool CheckLimit = true) { 1154 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); 1155 1156 // Check if the predecessor is already joined. 1157 const SUnit *PredSU = PredDep.getSUnit(); 1158 unsigned PredNum = PredSU->NodeNum; 1159 if (R.DFSNodeData[PredNum].SubtreeID != PredNum) 1160 return false; 1161 1162 // Four is the magic number of successors before a node is considered a 1163 // pinch point. 1164 unsigned NumDataSucs = 0; 1165 for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), 1166 SE = PredSU->Succs.end(); SI != SE; ++SI) { 1167 if (SI->getKind() == SDep::Data) { 1168 if (++NumDataSucs >= 4) 1169 return false; 1170 } 1171 } 1172 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) 1173 return false; 1174 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; 1175 SubtreeClasses.join(Succ->NodeNum, PredNum); 1176 return true; 1177 } 1178 1179 /// Called by finalize() to record a connection between trees. 1180 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { 1181 if (!Depth) 1182 return; 1183 1184 do { 1185 SmallVectorImpl<SchedDFSResult::Connection> &Connections = 1186 R.SubtreeConnections[FromTree]; 1187 for (SmallVectorImpl<SchedDFSResult::Connection>::iterator 1188 I = Connections.begin(), E = Connections.end(); I != E; ++I) { 1189 if (I->TreeID == ToTree) { 1190 I->Level = std::max(I->Level, Depth); 1191 return; 1192 } 1193 } 1194 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); 1195 FromTree = R.DFSTreeData[FromTree].ParentTreeID; 1196 } while (FromTree != SchedDFSResult::InvalidSubtreeID); 1197 } 1198 }; 1199 } // namespace llvm 1200 1201 namespace { 1202 /// \brief Manage the stack used by a reverse depth-first search over the DAG. 1203 class SchedDAGReverseDFS { 1204 std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack; 1205 public: 1206 bool isComplete() const { return DFSStack.empty(); } 1207 1208 void follow(const SUnit *SU) { 1209 DFSStack.push_back(std::make_pair(SU, SU->Preds.begin())); 1210 } 1211 void advance() { ++DFSStack.back().second; } 1212 1213 const SDep *backtrack() { 1214 DFSStack.pop_back(); 1215 return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); 1216 } 1217 1218 const SUnit *getCurr() const { return DFSStack.back().first; } 1219 1220 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; } 1221 1222 SUnit::const_pred_iterator getPredEnd() const { 1223 return getCurr()->Preds.end(); 1224 } 1225 }; 1226 } // anonymous 1227 1228 static bool hasDataSucc(const SUnit *SU) { 1229 for (SUnit::const_succ_iterator 1230 SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { 1231 if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) 1232 return true; 1233 } 1234 return false; 1235 } 1236 1237 /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first 1238 /// search from this root. 1239 void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) { 1240 if (!IsBottomUp) 1241 llvm_unreachable("Top-down ILP metric is unimplemnted"); 1242 1243 SchedDFSImpl Impl(*this); 1244 for (ArrayRef<SUnit>::const_iterator 1245 SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { 1246 const SUnit *SU = &*SI; 1247 if (Impl.isVisited(SU) || hasDataSucc(SU)) 1248 continue; 1249 1250 SchedDAGReverseDFS DFS; 1251 Impl.visitPreorder(SU); 1252 DFS.follow(SU); 1253 for (;;) { 1254 // Traverse the leftmost path as far as possible. 1255 while (DFS.getPred() != DFS.getPredEnd()) { 1256 const SDep &PredDep = *DFS.getPred(); 1257 DFS.advance(); 1258 // Ignore non-data edges. 1259 if (PredDep.getKind() != SDep::Data 1260 || PredDep.getSUnit()->isBoundaryNode()) { 1261 continue; 1262 } 1263 // An already visited edge is a cross edge, assuming an acyclic DAG. 1264 if (Impl.isVisited(PredDep.getSUnit())) { 1265 Impl.visitCrossEdge(PredDep, DFS.getCurr()); 1266 continue; 1267 } 1268 Impl.visitPreorder(PredDep.getSUnit()); 1269 DFS.follow(PredDep.getSUnit()); 1270 } 1271 // Visit the top of the stack in postorder and backtrack. 1272 const SUnit *Child = DFS.getCurr(); 1273 const SDep *PredDep = DFS.backtrack(); 1274 Impl.visitPostorderNode(Child); 1275 if (PredDep) 1276 Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); 1277 if (DFS.isComplete()) 1278 break; 1279 } 1280 } 1281 Impl.finalize(); 1282 } 1283 1284 /// The root of the given SubtreeID was just scheduled. For all subtrees 1285 /// connected to this tree, record the depth of the connection so that the 1286 /// nearest connected subtrees can be prioritized. 1287 void SchedDFSResult::scheduleTree(unsigned SubtreeID) { 1288 for (SmallVectorImpl<Connection>::const_iterator 1289 I = SubtreeConnections[SubtreeID].begin(), 1290 E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { 1291 SubtreeConnectLevels[I->TreeID] = 1292 std::max(SubtreeConnectLevels[I->TreeID], I->Level); 1293 DEBUG(dbgs() << " Tree: " << I->TreeID 1294 << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); 1295 } 1296 } 1297 1298 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1299 void ILPValue::print(raw_ostream &OS) const { 1300 OS << InstrCount << " / " << Length << " = "; 1301 if (!Length) 1302 OS << "BADILP"; 1303 else 1304 OS << format("%g", ((double)InstrCount / Length)); 1305 } 1306 1307 void ILPValue::dump() const { 1308 dbgs() << *this << '\n'; 1309 } 1310 1311 namespace llvm { 1312 1313 raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { 1314 Val.print(OS); 1315 return OS; 1316 } 1317 1318 } // namespace llvm 1319 #endif // !NDEBUG || LLVM_ENABLE_DUMP 1320