1 //===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements a simple VLIW packetizer using DFA. The packetizer works on 11 // machine basic blocks. For each instruction I in BB, the packetizer consults 12 // the DFA to see if machine resources are available to execute I. If so, the 13 // packetizer checks if I depends on any instruction J in the current packet. 14 // If no dependency is found, I is added to current packet and machine resource 15 // is marked as taken. If any dependency is found, a target API call is made to 16 // prune the dependence. 17 // 18 //===----------------------------------------------------------------------===// 19 #include "HexagonRegisterInfo.h" 20 #include "HexagonSubtarget.h" 21 #include "HexagonTargetMachine.h" 22 #include "HexagonVLIWPacketizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 33 using namespace llvm; 34 35 #define DEBUG_TYPE "packets" 36 37 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, 38 cl::ZeroOrMore, cl::init(false), 39 cl::desc("Disable Hexagon packetizer pass")); 40 41 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", 42 cl::ZeroOrMore, cl::Hidden, cl::init(true), 43 cl::desc("Allow non-solo packetization of volatile memory references")); 44 45 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), 46 cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); 47 48 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", 49 cl::init(false), cl::Hidden, cl::ZeroOrMore, 50 cl::desc("Disable vector double new-value-stores")); 51 52 extern cl::opt<bool> ScheduleInlineAsm; 53 54 namespace llvm { 55 FunctionPass *createHexagonPacketizer(); 56 void initializeHexagonPacketizerPass(PassRegistry&); 57 } 58 59 60 namespace { 61 class HexagonPacketizer : public MachineFunctionPass { 62 public: 63 static char ID; 64 HexagonPacketizer() : MachineFunctionPass(ID) { 65 initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); 66 } 67 68 void getAnalysisUsage(AnalysisUsage &AU) const override { 69 AU.setPreservesCFG(); 70 AU.addRequired<AAResultsWrapperPass>(); 71 AU.addRequired<MachineBranchProbabilityInfo>(); 72 AU.addRequired<MachineDominatorTree>(); 73 AU.addRequired<MachineLoopInfo>(); 74 AU.addPreserved<MachineDominatorTree>(); 75 AU.addPreserved<MachineLoopInfo>(); 76 MachineFunctionPass::getAnalysisUsage(AU); 77 } 78 const char *getPassName() const override { 79 return "Hexagon Packetizer"; 80 } 81 bool runOnMachineFunction(MachineFunction &Fn) override; 82 MachineFunctionProperties getRequiredProperties() const override { 83 return MachineFunctionProperties().set( 84 MachineFunctionProperties::Property::AllVRegsAllocated); 85 } 86 87 private: 88 const HexagonInstrInfo *HII; 89 const HexagonRegisterInfo *HRI; 90 }; 91 92 char HexagonPacketizer::ID = 0; 93 } 94 95 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", 96 false, false) 97 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 98 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 99 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 100 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 101 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", 102 false, false) 103 104 105 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, 106 MachineLoopInfo &MLI, AliasAnalysis *AA, 107 const MachineBranchProbabilityInfo *MBPI) 108 : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { 109 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 110 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 111 } 112 113 // Check if FirstI modifies a register that SecondI reads. 114 static bool hasWriteToReadDep(const MachineInstr &FirstI, 115 const MachineInstr &SecondI, 116 const TargetRegisterInfo *TRI) { 117 for (auto &MO : FirstI.operands()) { 118 if (!MO.isReg() || !MO.isDef()) 119 continue; 120 unsigned R = MO.getReg(); 121 if (SecondI.readsRegister(R, TRI)) 122 return true; 123 } 124 return false; 125 } 126 127 128 static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, 129 MachineBasicBlock::iterator BundleIt, bool Before) { 130 MachineBasicBlock::instr_iterator InsertPt; 131 if (Before) 132 InsertPt = BundleIt.getInstrIterator(); 133 else 134 InsertPt = std::next(BundleIt).getInstrIterator(); 135 136 MachineBasicBlock &B = *MI->getParent(); 137 // The instruction should at least be bundled with the preceding instruction 138 // (there will always be one, i.e. BUNDLE, if nothing else). 139 assert(MI->isBundledWithPred()); 140 if (MI->isBundledWithSucc()) { 141 MI->clearFlag(MachineInstr::BundledSucc); 142 MI->clearFlag(MachineInstr::BundledPred); 143 } else { 144 // If it's not bundled with the successor (i.e. it is the last one 145 // in the bundle), then we can simply unbundle it from the predecessor, 146 // which will take care of updating the predecessor's flag. 147 MI->unbundleFromPred(); 148 } 149 B.splice(InsertPt, &B, MI); 150 151 // Get the size of the bundle without asserting. 152 MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator(); 153 MachineBasicBlock::const_instr_iterator E = B.instr_end(); 154 unsigned Size = 0; 155 for (++I; I != E && I->isBundledWithPred(); ++I) 156 ++Size; 157 158 // If there are still two or more instructions, then there is nothing 159 // else to be done. 160 if (Size > 1) 161 return BundleIt; 162 163 // Otherwise, extract the single instruction out and delete the bundle. 164 MachineBasicBlock::iterator NextIt = std::next(BundleIt); 165 MachineInstr *SingleI = BundleIt->getNextNode(); 166 SingleI->unbundleFromPred(); 167 assert(!SingleI->isBundledWithSucc()); 168 BundleIt->eraseFromParent(); 169 return NextIt; 170 } 171 172 173 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { 174 if (DisablePacketizer || skipFunction(*MF.getFunction())) 175 return false; 176 177 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 178 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 179 auto &MLI = getAnalysis<MachineLoopInfo>(); 180 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 181 auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 182 183 if (EnableGenAllInsnClass) 184 HII->genAllInsnTimingClasses(MF); 185 186 // Instantiate the packetizer. 187 HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); 188 189 // DFA state table should not be empty. 190 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 191 192 // 193 // Loop over all basic blocks and remove KILL pseudo-instructions 194 // These instructions confuse the dependence analysis. Consider: 195 // D0 = ... (Insn 0) 196 // R0 = KILL R0, D0 (Insn 1) 197 // R0 = ... (Insn 2) 198 // Here, Insn 1 will result in the dependence graph not emitting an output 199 // dependence between Insn 0 and Insn 2. This can lead to incorrect 200 // packetization 201 // 202 for (auto &MB : MF) { 203 auto End = MB.end(); 204 auto MI = MB.begin(); 205 while (MI != End) { 206 auto NextI = std::next(MI); 207 if (MI->isKill()) { 208 MB.erase(MI); 209 End = MB.end(); 210 } 211 MI = NextI; 212 } 213 } 214 215 // Loop over all of the basic blocks. 216 for (auto &MB : MF) { 217 auto Begin = MB.begin(), End = MB.end(); 218 while (Begin != End) { 219 // First the first non-boundary starting from the end of the last 220 // scheduling region. 221 MachineBasicBlock::iterator RB = Begin; 222 while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF)) 223 ++RB; 224 // First the first boundary starting from the beginning of the new 225 // region. 226 MachineBasicBlock::iterator RE = RB; 227 while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF)) 228 ++RE; 229 // Add the scheduling boundary if it's not block end. 230 if (RE != End) 231 ++RE; 232 // If RB == End, then RE == End. 233 if (RB != End) 234 Packetizer.PacketizeMIs(&MB, RB, RE); 235 236 Begin = RE; 237 } 238 } 239 240 Packetizer.unpacketizeSoloInstrs(MF); 241 return true; 242 } 243 244 245 // Reserve resources for a constant extender. Trigger an assertion if the 246 // reservation fails. 247 void HexagonPacketizerList::reserveResourcesForConstExt() { 248 if (!tryAllocateResourcesForConstExt(true)) 249 llvm_unreachable("Resources not available"); 250 } 251 252 bool HexagonPacketizerList::canReserveResourcesForConstExt() { 253 return tryAllocateResourcesForConstExt(false); 254 } 255 256 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, 257 // return true, otherwise, return false. 258 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { 259 auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); 260 bool Avail = ResourceTracker->canReserveResources(*ExtMI); 261 if (Reserve && Avail) 262 ResourceTracker->reserveResources(*ExtMI); 263 MF.DeleteMachineInstr(ExtMI); 264 return Avail; 265 } 266 267 268 bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, 269 SDep::Kind DepType, unsigned DepReg) { 270 // Check for LR dependence. 271 if (DepReg == HRI->getRARegister()) 272 return true; 273 274 if (HII->isDeallocRet(MI)) 275 if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) 276 return true; 277 278 // Check if this is a predicate dependence. 279 const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); 280 if (RC == &Hexagon::PredRegsRegClass) 281 return true; 282 283 // Assumes that the first operand of the CALLr is the function address. 284 if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { 285 MachineOperand MO = MI->getOperand(0); 286 if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) 287 return true; 288 } 289 290 return false; 291 } 292 293 static bool isRegDependence(const SDep::Kind DepType) { 294 return DepType == SDep::Data || DepType == SDep::Anti || 295 DepType == SDep::Output; 296 } 297 298 static bool isDirectJump(const MachineInstr* MI) { 299 return MI->getOpcode() == Hexagon::J2_jump; 300 } 301 302 static bool isSchedBarrier(const MachineInstr* MI) { 303 switch (MI->getOpcode()) { 304 case Hexagon::Y2_barrier: 305 return true; 306 } 307 return false; 308 } 309 310 static bool isControlFlow(const MachineInstr* MI) { 311 return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); 312 } 313 314 315 /// Returns true if the instruction modifies a callee-saved register. 316 static bool doesModifyCalleeSavedReg(const MachineInstr *MI, 317 const TargetRegisterInfo *TRI) { 318 const MachineFunction &MF = *MI->getParent()->getParent(); 319 for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) 320 if (MI->modifiesRegister(*CSR, TRI)) 321 return true; 322 return false; 323 } 324 325 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) 326 // Returns true if an instruction can be promoted to .new predicate or 327 // new-value store. 328 bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { 329 return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); 330 } 331 332 // Promote an instructiont to its .cur form. 333 // At this time, we have already made a call to canPromoteToDotCur and made 334 // sure that it can *indeed* be promoted. 335 bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, 336 SDep::Kind DepType, MachineBasicBlock::iterator &MII, 337 const TargetRegisterClass* RC) { 338 assert(DepType == SDep::Data); 339 int CurOpcode = HII->getDotCurOp(MI); 340 MI->setDesc(HII->get(CurOpcode)); 341 return true; 342 } 343 344 void HexagonPacketizerList::cleanUpDotCur() { 345 MachineInstr *MI = NULL; 346 for (auto BI : CurrentPacketMIs) { 347 DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); 348 if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { 349 MI = BI; 350 continue; 351 } 352 if (MI) { 353 for (auto &MO : BI->operands()) 354 if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) 355 return; 356 } 357 } 358 if (!MI) 359 return; 360 // We did not find a use of the CUR, so de-cur it. 361 MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); 362 DEBUG(dbgs() << "Demoted CUR "; MI->dump();); 363 } 364 365 // Check to see if an instruction can be dot cur. 366 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, 367 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 368 const TargetRegisterClass *RC) { 369 if (!HII->isV60VectorInstruction(MI)) 370 return false; 371 if (!HII->isV60VectorInstruction(&*MII)) 372 return false; 373 374 // Already a dot new instruction. 375 if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) 376 return false; 377 378 if (!HII->mayBeCurLoad(MI)) 379 return false; 380 381 // The "cur value" cannot come from inline asm. 382 if (PacketSU->getInstr()->isInlineAsm()) 383 return false; 384 385 // Make sure candidate instruction uses cur. 386 DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; 387 MI->dump(); 388 dbgs() << "in packet\n";); 389 MachineInstr &MJ = *MII; 390 DEBUG({ 391 dbgs() << "Checking CUR against "; 392 MJ.dump(); 393 }); 394 unsigned DestReg = MI->getOperand(0).getReg(); 395 bool FoundMatch = false; 396 for (auto &MO : MJ.operands()) 397 if (MO.isReg() && MO.getReg() == DestReg) 398 FoundMatch = true; 399 if (!FoundMatch) 400 return false; 401 402 // Check for existing uses of a vector register within the packet which 403 // would be affected by converting a vector load into .cur formt. 404 for (auto BI : CurrentPacketMIs) { 405 DEBUG(dbgs() << "packet has "; BI->dump();); 406 if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) 407 return false; 408 } 409 410 DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); 411 // We can convert the opcode into a .cur. 412 return true; 413 } 414 415 // Promote an instruction to its .new form. At this time, we have already 416 // made a call to canPromoteToDotNew and made sure that it can *indeed* be 417 // promoted. 418 bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, 419 SDep::Kind DepType, MachineBasicBlock::iterator &MII, 420 const TargetRegisterClass* RC) { 421 assert (DepType == SDep::Data); 422 int NewOpcode; 423 if (RC == &Hexagon::PredRegsRegClass) 424 NewOpcode = HII->getDotNewPredOp(MI, MBPI); 425 else 426 NewOpcode = HII->getDotNewOp(MI); 427 MI->setDesc(HII->get(NewOpcode)); 428 return true; 429 } 430 431 bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { 432 int NewOpcode = HII->getDotOldOp(MI->getOpcode()); 433 MI->setDesc(HII->get(NewOpcode)); 434 return true; 435 } 436 437 enum PredicateKind { 438 PK_False, 439 PK_True, 440 PK_Unknown 441 }; 442 443 /// Returns true if an instruction is predicated on p0 and false if it's 444 /// predicated on !p0. 445 static PredicateKind getPredicateSense(const MachineInstr &MI, 446 const HexagonInstrInfo *HII) { 447 if (!HII->isPredicated(MI)) 448 return PK_Unknown; 449 if (HII->isPredicatedTrue(MI)) 450 return PK_True; 451 return PK_False; 452 } 453 454 static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, 455 const HexagonInstrInfo *HII) { 456 assert(HII->isPostIncrement(MI) && "Not a post increment operation."); 457 #ifndef NDEBUG 458 // Post Increment means duplicates. Use dense map to find duplicates in the 459 // list. Caution: Densemap initializes with the minimum of 64 buckets, 460 // whereas there are at most 5 operands in the post increment. 461 DenseSet<unsigned> DefRegsSet; 462 for (auto &MO : MI->operands()) 463 if (MO.isReg() && MO.isDef()) 464 DefRegsSet.insert(MO.getReg()); 465 466 for (auto &MO : MI->operands()) 467 if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) 468 return MO; 469 #else 470 if (MI->mayLoad()) { 471 const MachineOperand &Op1 = MI->getOperand(1); 472 // The 2nd operand is always the post increment operand in load. 473 assert(Op1.isReg() && "Post increment operand has be to a register."); 474 return Op1; 475 } 476 if (MI->getDesc().mayStore()) { 477 const MachineOperand &Op0 = MI->getOperand(0); 478 // The 1st operand is always the post increment operand in store. 479 assert(Op0.isReg() && "Post increment operand has be to a register."); 480 return Op0; 481 } 482 #endif 483 // we should never come here. 484 llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); 485 } 486 487 // Get the value being stored. 488 static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { 489 // value being stored is always the last operand. 490 return MI->getOperand(MI->getNumOperands()-1); 491 } 492 493 static bool isLoadAbsSet(const MachineInstr *MI) { 494 unsigned Opc = MI->getOpcode(); 495 switch (Opc) { 496 case Hexagon::L4_loadrd_ap: 497 case Hexagon::L4_loadrb_ap: 498 case Hexagon::L4_loadrh_ap: 499 case Hexagon::L4_loadrub_ap: 500 case Hexagon::L4_loadruh_ap: 501 case Hexagon::L4_loadri_ap: 502 return true; 503 } 504 return false; 505 } 506 507 static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { 508 assert(isLoadAbsSet(MI)); 509 return MI->getOperand(1); 510 } 511 512 513 // Can be new value store? 514 // Following restrictions are to be respected in convert a store into 515 // a new value store. 516 // 1. If an instruction uses auto-increment, its address register cannot 517 // be a new-value register. Arch Spec 5.4.2.1 518 // 2. If an instruction uses absolute-set addressing mode, its address 519 // register cannot be a new-value register. Arch Spec 5.4.2.1. 520 // 3. If an instruction produces a 64-bit result, its registers cannot be used 521 // as new-value registers. Arch Spec 5.4.2.2. 522 // 4. If the instruction that sets the new-value register is conditional, then 523 // the instruction that uses the new-value register must also be conditional, 524 // and both must always have their predicates evaluate identically. 525 // Arch Spec 5.4.2.3. 526 // 5. There is an implied restriction that a packet cannot have another store, 527 // if there is a new value store in the packet. Corollary: if there is 528 // already a store in a packet, there can not be a new value store. 529 // Arch Spec: 3.4.4.2 530 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, 531 const MachineInstr *PacketMI, unsigned DepReg) { 532 // Make sure we are looking at the store, that can be promoted. 533 if (!HII->mayBeNewStore(MI)) 534 return false; 535 536 // Make sure there is dependency and can be new value'd. 537 const MachineOperand &Val = getStoreValueOperand(MI); 538 if (Val.isReg() && Val.getReg() != DepReg) 539 return false; 540 541 const MCInstrDesc& MCID = PacketMI->getDesc(); 542 543 // First operand is always the result. 544 const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); 545 // Double regs can not feed into new value store: PRM section: 5.4.2.2. 546 if (PacketRC == &Hexagon::DoubleRegsRegClass) 547 return false; 548 549 // New-value stores are of class NV (slot 0), dual stores require class ST 550 // in slot 0 (PRM 5.5). 551 for (auto I : CurrentPacketMIs) { 552 SUnit *PacketSU = MIToSUnit.find(I)->second; 553 if (PacketSU->getInstr()->mayStore()) 554 return false; 555 } 556 557 // Make sure it's NOT the post increment register that we are going to 558 // new value. 559 if (HII->isPostIncrement(MI) && 560 getPostIncrementOperand(MI, HII).getReg() == DepReg) { 561 return false; 562 } 563 564 if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && 565 getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { 566 // If source is post_inc, or absolute-set addressing, it can not feed 567 // into new value store 568 // r3 = memw(r2++#4) 569 // memw(r30 + #-1404) = r2.new -> can not be new value store 570 // arch spec section: 5.4.2.1. 571 return false; 572 } 573 574 if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) 575 return false; 576 577 // If the source that feeds the store is predicated, new value store must 578 // also be predicated. 579 if (HII->isPredicated(*PacketMI)) { 580 if (!HII->isPredicated(*MI)) 581 return false; 582 583 // Check to make sure that they both will have their predicates 584 // evaluate identically. 585 unsigned predRegNumSrc = 0; 586 unsigned predRegNumDst = 0; 587 const TargetRegisterClass* predRegClass = nullptr; 588 589 // Get predicate register used in the source instruction. 590 for (auto &MO : PacketMI->operands()) { 591 if (!MO.isReg()) 592 continue; 593 predRegNumSrc = MO.getReg(); 594 predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); 595 if (predRegClass == &Hexagon::PredRegsRegClass) 596 break; 597 } 598 assert((predRegClass == &Hexagon::PredRegsRegClass) && 599 "predicate register not found in a predicated PacketMI instruction"); 600 601 // Get predicate register used in new-value store instruction. 602 for (auto &MO : MI->operands()) { 603 if (!MO.isReg()) 604 continue; 605 predRegNumDst = MO.getReg(); 606 predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); 607 if (predRegClass == &Hexagon::PredRegsRegClass) 608 break; 609 } 610 assert((predRegClass == &Hexagon::PredRegsRegClass) && 611 "predicate register not found in a predicated MI instruction"); 612 613 // New-value register producer and user (store) need to satisfy these 614 // constraints: 615 // 1) Both instructions should be predicated on the same register. 616 // 2) If producer of the new-value register is .new predicated then store 617 // should also be .new predicated and if producer is not .new predicated 618 // then store should not be .new predicated. 619 // 3) Both new-value register producer and user should have same predicate 620 // sense, i.e, either both should be negated or both should be non-negated. 621 if (predRegNumDst != predRegNumSrc || 622 HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || 623 getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII)) 624 return false; 625 } 626 627 // Make sure that other than the new-value register no other store instruction 628 // register has been modified in the same packet. Predicate registers can be 629 // modified by they should not be modified between the producer and the store 630 // instruction as it will make them both conditional on different values. 631 // We already know this to be true for all the instructions before and 632 // including PacketMI. Howerver, we need to perform the check for the 633 // remaining instructions in the packet. 634 635 unsigned StartCheck = 0; 636 637 for (auto I : CurrentPacketMIs) { 638 SUnit *TempSU = MIToSUnit.find(I)->second; 639 MachineInstr* TempMI = TempSU->getInstr(); 640 641 // Following condition is true for all the instructions until PacketMI is 642 // reached (StartCheck is set to 0 before the for loop). 643 // StartCheck flag is 1 for all the instructions after PacketMI. 644 if (TempMI != PacketMI && !StartCheck) // Start processing only after 645 continue; // encountering PacketMI. 646 647 StartCheck = 1; 648 if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. 649 continue; 650 651 for (auto &MO : MI->operands()) 652 if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) 653 return false; 654 } 655 656 // Make sure that for non-POST_INC stores: 657 // 1. The only use of reg is DepReg and no other registers. 658 // This handles V4 base+index registers. 659 // The following store can not be dot new. 660 // Eg. r0 = add(r0, #3) 661 // memw(r1+r0<<#2) = r0 662 if (!HII->isPostIncrement(MI)) { 663 for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { 664 const MachineOperand &MO = MI->getOperand(opNum); 665 if (MO.isReg() && MO.getReg() == DepReg) 666 return false; 667 } 668 } 669 670 // If data definition is because of implicit definition of the register, 671 // do not newify the store. Eg. 672 // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> 673 // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] 674 for (auto &MO : PacketMI->operands()) { 675 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) 676 continue; 677 unsigned R = MO.getReg(); 678 if (R == DepReg || HRI->isSuperRegister(DepReg, R)) 679 return false; 680 } 681 682 // Handle imp-use of super reg case. There is a target independent side 683 // change that should prevent this situation but I am handling it for 684 // just-in-case. For example, we cannot newify R2 in the following case: 685 // %R3<def> = A2_tfrsi 0; 686 // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; 687 for (auto &MO : MI->operands()) { 688 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) 689 return false; 690 } 691 692 // Can be dot new store. 693 return true; 694 } 695 696 // Can this MI to promoted to either new value store or new value jump. 697 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, 698 const SUnit *PacketSU, unsigned DepReg, 699 MachineBasicBlock::iterator &MII) { 700 if (!HII->mayBeNewStore(MI)) 701 return false; 702 703 // Check to see the store can be new value'ed. 704 MachineInstr *PacketMI = PacketSU->getInstr(); 705 if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) 706 return true; 707 708 // Check to see the compare/jump can be new value'ed. 709 // This is done as a pass on its own. Don't need to check it here. 710 return false; 711 } 712 713 static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { 714 for (auto &MO : I->operands()) 715 if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) 716 return true; 717 return false; 718 } 719 720 // Check to see if an instruction can be dot new 721 // There are three kinds. 722 // 1. dot new on predicate - V2/V3/V4 723 // 2. dot new on stores NV/ST - V4 724 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. 725 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, 726 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 727 const TargetRegisterClass* RC) { 728 // Already a dot new instruction. 729 if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) 730 return false; 731 732 if (!isNewifiable(MI)) 733 return false; 734 735 const MachineInstr *PI = PacketSU->getInstr(); 736 737 // The "new value" cannot come from inline asm. 738 if (PI->isInlineAsm()) 739 return false; 740 741 // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no 742 // sense. 743 if (PI->isImplicitDef()) 744 return false; 745 746 // If dependency is trough an implicitly defined register, we should not 747 // newify the use. 748 if (isImplicitDependency(PI, DepReg)) 749 return false; 750 751 const MCInstrDesc& MCID = PI->getDesc(); 752 const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); 753 if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) 754 return false; 755 756 // predicate .new 757 // bug 5670: until that is fixed 758 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) 759 if (RC == &Hexagon::PredRegsRegClass) 760 if (HII->isCondInst(MI) || MI->isReturn()) 761 return HII->predCanBeUsedAsDotNew(PI, DepReg); 762 763 if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) 764 return false; 765 766 // Create a dot new machine instruction to see if resources can be 767 // allocated. If not, bail out now. 768 int NewOpcode = HII->getDotNewOp(MI); 769 const MCInstrDesc &D = HII->get(NewOpcode); 770 MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); 771 bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI); 772 MF.DeleteMachineInstr(NewMI); 773 if (!ResourcesAvailable) 774 return false; 775 776 // New Value Store only. New Value Jump generated as a separate pass. 777 if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) 778 return false; 779 780 return true; 781 } 782 783 // Go through the packet instructions and search for an anti dependency between 784 // them and DepReg from MI. Consider this case: 785 // Trying to add 786 // a) %R1<def> = TFRI_cdNotPt %P3, 2 787 // to this packet: 788 // { 789 // b) %P0<def> = C2_or %P3<kill>, %P0<kill> 790 // c) %P3<def> = C2_tfrrp %R23 791 // d) %R1<def> = C2_cmovenewit %P3, 4 792 // } 793 // The P3 from a) and d) will be complements after 794 // a)'s P3 is converted to .new form 795 // Anti-dep between c) and b) is irrelevant for this case 796 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, 797 unsigned DepReg) { 798 SUnit *PacketSUDep = MIToSUnit.find(MI)->second; 799 800 for (auto I : CurrentPacketMIs) { 801 // We only care for dependencies to predicated instructions 802 if (!HII->isPredicated(*I)) 803 continue; 804 805 // Scheduling Unit for current insn in the packet 806 SUnit *PacketSU = MIToSUnit.find(I)->second; 807 808 // Look at dependencies between current members of the packet and 809 // predicate defining instruction MI. Make sure that dependency is 810 // on the exact register we care about. 811 if (PacketSU->isSucc(PacketSUDep)) { 812 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 813 auto &Dep = PacketSU->Succs[i]; 814 if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && 815 Dep.getReg() == DepReg) 816 return true; 817 } 818 } 819 } 820 821 return false; 822 } 823 824 825 /// Gets the predicate register of a predicated instruction. 826 static unsigned getPredicatedRegister(MachineInstr &MI, 827 const HexagonInstrInfo *QII) { 828 /// We use the following rule: The first predicate register that is a use is 829 /// the predicate register of a predicated instruction. 830 assert(QII->isPredicated(MI) && "Must be predicated instruction"); 831 832 for (auto &Op : MI.operands()) { 833 if (Op.isReg() && Op.getReg() && Op.isUse() && 834 Hexagon::PredRegsRegClass.contains(Op.getReg())) 835 return Op.getReg(); 836 } 837 838 llvm_unreachable("Unknown instruction operand layout"); 839 return 0; 840 } 841 842 // Given two predicated instructions, this function detects whether 843 // the predicates are complements. 844 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1, 845 MachineInstr &MI2) { 846 // If we don't know the predicate sense of the instructions bail out early, we 847 // need it later. 848 if (getPredicateSense(MI1, HII) == PK_Unknown || 849 getPredicateSense(MI2, HII) == PK_Unknown) 850 return false; 851 852 // Scheduling unit for candidate. 853 SUnit *SU = MIToSUnit[&MI1]; 854 855 // One corner case deals with the following scenario: 856 // Trying to add 857 // a) %R24<def> = A2_tfrt %P0, %R25 858 // to this packet: 859 // { 860 // b) %R25<def> = A2_tfrf %P0, %R24 861 // c) %P0<def> = C2_cmpeqi %R26, 1 862 // } 863 // 864 // On general check a) and b) are complements, but presence of c) will 865 // convert a) to .new form, and then it is not a complement. 866 // We attempt to detect it by analyzing existing dependencies in the packet. 867 868 // Analyze relationships between all existing members of the packet. 869 // Look for Anti dependecy on the same predicate reg as used in the 870 // candidate. 871 for (auto I : CurrentPacketMIs) { 872 // Scheduling Unit for current insn in the packet. 873 SUnit *PacketSU = MIToSUnit.find(I)->second; 874 875 // If this instruction in the packet is succeeded by the candidate... 876 if (PacketSU->isSucc(SU)) { 877 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 878 auto Dep = PacketSU->Succs[i]; 879 // The corner case exist when there is true data dependency between 880 // candidate and one of current packet members, this dep is on 881 // predicate reg, and there already exist anti dep on the same pred in 882 // the packet. 883 if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && 884 Hexagon::PredRegsRegClass.contains(Dep.getReg())) { 885 // Here I know that I is predicate setting instruction with true 886 // data dep to candidate on the register we care about - c) in the 887 // above example. Now I need to see if there is an anti dependency 888 // from c) to any other instruction in the same packet on the pred 889 // reg of interest. 890 if (restrictingDepExistInPacket(I, Dep.getReg())) 891 return false; 892 } 893 } 894 } 895 } 896 897 // If the above case does not apply, check regular complement condition. 898 // Check that the predicate register is the same and that the predicate 899 // sense is different We also need to differentiate .old vs. .new: !p0 900 // is not complementary to p0.new. 901 unsigned PReg1 = getPredicatedRegister(MI1, HII); 902 unsigned PReg2 = getPredicatedRegister(MI2, HII); 903 return PReg1 == PReg2 && 904 Hexagon::PredRegsRegClass.contains(PReg1) && 905 Hexagon::PredRegsRegClass.contains(PReg2) && 906 getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && 907 HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2); 908 } 909 910 // Initialize packetizer flags. 911 void HexagonPacketizerList::initPacketizerState() { 912 Dependence = false; 913 PromotedToDotNew = false; 914 GlueToNewValueJump = false; 915 GlueAllocframeStore = false; 916 FoundSequentialDependence = false; 917 } 918 919 // Ignore bundling of pseudo instructions. 920 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI, 921 const MachineBasicBlock *) { 922 if (MI.isDebugValue()) 923 return true; 924 925 if (MI.isCFIInstruction()) 926 return false; 927 928 // We must print out inline assembly. 929 if (MI.isInlineAsm()) 930 return false; 931 932 if (MI.isImplicitDef()) 933 return false; 934 935 // We check if MI has any functional units mapped to it. If it doesn't, 936 // we ignore the instruction. 937 const MCInstrDesc& TID = MI.getDesc(); 938 auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); 939 unsigned FuncUnits = IS->getUnits(); 940 return !FuncUnits; 941 } 942 943 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) { 944 if (MI.isEHLabel() || MI.isCFIInstruction()) 945 return true; 946 947 // Consider inline asm to not be a solo instruction by default. 948 // Inline asm will be put in a packet temporarily, but then it will be 949 // removed, and placed outside of the packet (before or after, depending 950 // on dependencies). This is to reduce the impact of inline asm as a 951 // "packet splitting" instruction. 952 if (MI.isInlineAsm() && !ScheduleInlineAsm) 953 return true; 954 955 // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: 956 // trap, pause, barrier, icinva, isync, and syncht are solo instructions. 957 // They must not be grouped with other instructions in a packet. 958 if (isSchedBarrier(&MI)) 959 return true; 960 961 if (HII->isSolo(&MI)) 962 return true; 963 964 if (MI.getOpcode() == Hexagon::A2_nop) 965 return true; 966 967 return false; 968 } 969 970 971 // Quick check if instructions MI and MJ cannot coexist in the same packet. 972 // Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", 973 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". 974 // For full test call this function twice: 975 // cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) 976 // Doing the test only one way saves the amount of code in this function, 977 // since every test would need to be repeated with the MI and MJ reversed. 978 static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, 979 const HexagonInstrInfo &HII) { 980 const MachineFunction *MF = MI->getParent()->getParent(); 981 if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && 982 HII.isHVXMemWithAIndirect(MI, MJ)) 983 return true; 984 985 // An inline asm cannot be together with a branch, because we may not be 986 // able to remove the asm out after packetizing (i.e. if the asm must be 987 // moved past the bundle). Similarly, two asms cannot be together to avoid 988 // complications when determining their relative order outside of a bundle. 989 if (MI->isInlineAsm()) 990 return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || 991 MJ->isCall() || MJ->isTerminator(); 992 993 // "False" really means that the quick check failed to determine if 994 // I and J cannot coexist. 995 return false; 996 } 997 998 999 // Full, symmetric check. 1000 bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, 1001 const MachineInstr *MJ) { 1002 return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); 1003 } 1004 1005 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { 1006 for (auto &B : MF) { 1007 MachineBasicBlock::iterator BundleIt; 1008 MachineBasicBlock::instr_iterator NextI; 1009 for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { 1010 NextI = std::next(I); 1011 MachineInstr *MI = &*I; 1012 if (MI->isBundle()) 1013 BundleIt = I; 1014 if (!MI->isInsideBundle()) 1015 continue; 1016 1017 // Decide on where to insert the instruction that we are pulling out. 1018 // Debug instructions always go before the bundle, but the placement of 1019 // INLINE_ASM depends on potential dependencies. By default, try to 1020 // put it before the bundle, but if the asm writes to a register that 1021 // other instructions in the bundle read, then we need to place it 1022 // after the bundle (to preserve the bundle semantics). 1023 bool InsertBeforeBundle; 1024 if (MI->isInlineAsm()) 1025 InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI); 1026 else if (MI->isDebugValue()) 1027 InsertBeforeBundle = true; 1028 else 1029 continue; 1030 1031 BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); 1032 } 1033 } 1034 } 1035 1036 // Check if a given instruction is of class "system". 1037 static bool isSystemInstr(const MachineInstr *MI) { 1038 unsigned Opc = MI->getOpcode(); 1039 switch (Opc) { 1040 case Hexagon::Y2_barrier: 1041 case Hexagon::Y2_dcfetchbo: 1042 return true; 1043 } 1044 return false; 1045 } 1046 1047 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, 1048 const MachineInstr *J) { 1049 // The dependence graph may not include edges between dead definitions, 1050 // so without extra checks, we could end up packetizing two instruction 1051 // defining the same (dead) register. 1052 if (I->isCall() || J->isCall()) 1053 return false; 1054 if (HII->isPredicated(*I) || HII->isPredicated(*J)) 1055 return false; 1056 1057 BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); 1058 for (auto &MO : I->operands()) { 1059 if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1060 continue; 1061 DeadDefs[MO.getReg()] = true; 1062 } 1063 1064 for (auto &MO : J->operands()) { 1065 if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1066 continue; 1067 unsigned R = MO.getReg(); 1068 if (R != Hexagon::USR_OVF && DeadDefs[R]) 1069 return true; 1070 } 1071 return false; 1072 } 1073 1074 bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, 1075 const MachineInstr *J) { 1076 // A save callee-save register function call can only be in a packet 1077 // with instructions that don't write to the callee-save registers. 1078 if ((HII->isSaveCalleeSavedRegsCall(I) && 1079 doesModifyCalleeSavedReg(J, HRI)) || 1080 (HII->isSaveCalleeSavedRegsCall(J) && 1081 doesModifyCalleeSavedReg(I, HRI))) 1082 return true; 1083 1084 // Two control flow instructions cannot go in the same packet. 1085 if (isControlFlow(I) && isControlFlow(J)) 1086 return true; 1087 1088 // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot 1089 // contain a speculative indirect jump, 1090 // a new-value compare jump or a dealloc_return. 1091 auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { 1092 if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) 1093 return true; 1094 if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI)) 1095 return true; 1096 return false; 1097 }; 1098 1099 if (HII->isLoopN(I) && isBadForLoopN(J)) 1100 return true; 1101 if (HII->isLoopN(J) && isBadForLoopN(I)) 1102 return true; 1103 1104 // dealloc_return cannot appear in the same packet as a conditional or 1105 // unconditional jump. 1106 return HII->isDeallocRet(I) && 1107 (J->isBranch() || J->isCall() || J->isBarrier()); 1108 } 1109 1110 bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, 1111 const MachineInstr *J) { 1112 bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); 1113 bool StoreI = I->mayStore(), StoreJ = J->mayStore(); 1114 if ((SysI && StoreJ) || (SysJ && StoreI)) 1115 return true; 1116 1117 if (StoreI && StoreJ) { 1118 if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) 1119 return true; 1120 } else { 1121 // A memop cannot be in the same packet with another memop or a store. 1122 // Two stores can be together, but here I and J cannot both be stores. 1123 bool MopStI = HII->isMemOp(I) || StoreI; 1124 bool MopStJ = HII->isMemOp(J) || StoreJ; 1125 if (MopStI && MopStJ) 1126 return true; 1127 } 1128 1129 return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); 1130 } 1131 1132 // SUI is the current instruction that is out side of the current packet. 1133 // SUJ is the current instruction inside the current packet against which that 1134 // SUI will be packetized. 1135 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { 1136 MachineInstr *I = SUI->getInstr(); 1137 MachineInstr *J = SUJ->getInstr(); 1138 assert(I && J && "Unable to packetize null instruction!"); 1139 1140 // Clear IgnoreDepMIs when Packet starts. 1141 if (CurrentPacketMIs.size() == 1) 1142 IgnoreDepMIs.clear(); 1143 1144 MachineBasicBlock::iterator II = I; 1145 const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); 1146 1147 // Solo instructions cannot go in the packet. 1148 assert(!isSoloInstruction(*I) && "Unexpected solo instr!"); 1149 1150 if (cannotCoexist(I, J)) 1151 return false; 1152 1153 Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); 1154 if (Dependence) 1155 return false; 1156 1157 // V4 allows dual stores. It does not allow second store, if the first 1158 // store is not in SLOT0. New value store, new value jump, dealloc_return 1159 // and memop always take SLOT0. Arch spec 3.4.4.2. 1160 Dependence = hasV4SpecificDependence(I, J); 1161 if (Dependence) 1162 return false; 1163 1164 // If an instruction feeds new value jump, glue it. 1165 MachineBasicBlock::iterator NextMII = I; 1166 ++NextMII; 1167 if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) { 1168 MachineInstr &NextMI = *NextMII; 1169 1170 bool secondRegMatch = false; 1171 const MachineOperand &NOp0 = NextMI.getOperand(0); 1172 const MachineOperand &NOp1 = NextMI.getOperand(1); 1173 1174 if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) 1175 secondRegMatch = true; 1176 1177 for (auto I : CurrentPacketMIs) { 1178 SUnit *PacketSU = MIToSUnit.find(I)->second; 1179 MachineInstr *PI = PacketSU->getInstr(); 1180 // NVJ can not be part of the dual jump - Arch Spec: section 7.8. 1181 if (PI->isCall()) { 1182 Dependence = true; 1183 break; 1184 } 1185 // Validate: 1186 // 1. Packet does not have a store in it. 1187 // 2. If the first operand of the nvj is newified, and the second 1188 // operand is also a reg, it (second reg) is not defined in 1189 // the same packet. 1190 // 3. If the second operand of the nvj is newified, (which means 1191 // first operand is also a reg), first reg is not defined in 1192 // the same packet. 1193 if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || 1194 HII->isLoopN(PI)) { 1195 Dependence = true; 1196 break; 1197 } 1198 // Check #2/#3. 1199 const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; 1200 if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { 1201 Dependence = true; 1202 break; 1203 } 1204 } 1205 1206 if (Dependence) 1207 return false; 1208 GlueToNewValueJump = true; 1209 } 1210 1211 // There no dependency between a prolog instruction and its successor. 1212 if (!SUJ->isSucc(SUI)) 1213 return true; 1214 1215 for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { 1216 if (FoundSequentialDependence) 1217 break; 1218 1219 if (SUJ->Succs[i].getSUnit() != SUI) 1220 continue; 1221 1222 SDep::Kind DepType = SUJ->Succs[i].getKind(); 1223 // For direct calls: 1224 // Ignore register dependences for call instructions for packetization 1225 // purposes except for those due to r31 and predicate registers. 1226 // 1227 // For indirect calls: 1228 // Same as direct calls + check for true dependences to the register 1229 // used in the indirect call. 1230 // 1231 // We completely ignore Order dependences for call instructions. 1232 // 1233 // For returns: 1234 // Ignore register dependences for return instructions like jumpr, 1235 // dealloc return unless we have dependencies on the explicit uses 1236 // of the registers used by jumpr (like r31) or dealloc return 1237 // (like r29 or r30). 1238 // 1239 // TODO: Currently, jumpr is handling only return of r31. So, the 1240 // following logic (specificaly isCallDependent) is working fine. 1241 // We need to enable jumpr for register other than r31 and then, 1242 // we need to rework the last part, where it handles indirect call 1243 // of that (isCallDependent) function. Bug 6216 is opened for this. 1244 unsigned DepReg = 0; 1245 const TargetRegisterClass *RC = nullptr; 1246 if (DepType == SDep::Data) { 1247 DepReg = SUJ->Succs[i].getReg(); 1248 RC = HRI->getMinimalPhysRegClass(DepReg); 1249 } 1250 1251 if (I->isCall() || I->isReturn() || HII->isTailCall(I)) { 1252 if (!isRegDependence(DepType)) 1253 continue; 1254 if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) 1255 continue; 1256 } 1257 1258 if (DepType == SDep::Data) { 1259 if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) 1260 if (promoteToDotCur(J, DepType, II, RC)) 1261 continue; 1262 } 1263 1264 // Data dpendence ok if we have load.cur. 1265 if (DepType == SDep::Data && HII->isDotCurInst(J)) { 1266 if (HII->isV60VectorInstruction(I)) 1267 continue; 1268 } 1269 1270 // For instructions that can be promoted to dot-new, try to promote. 1271 if (DepType == SDep::Data) { 1272 if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { 1273 if (promoteToDotNew(I, DepType, II, RC)) { 1274 PromotedToDotNew = true; 1275 continue; 1276 } 1277 } 1278 if (HII->isNewValueJump(I)) 1279 continue; 1280 } 1281 1282 // For predicated instructions, if the predicates are complements then 1283 // there can be no dependence. 1284 if (HII->isPredicated(*I) && HII->isPredicated(*J) && 1285 arePredicatesComplements(*I, *J)) { 1286 // Not always safe to do this translation. 1287 // DAG Builder attempts to reduce dependence edges using transitive 1288 // nature of dependencies. Here is an example: 1289 // 1290 // r0 = tfr_pt ... (1) 1291 // r0 = tfr_pf ... (2) 1292 // r0 = tfr_pt ... (3) 1293 // 1294 // There will be an output dependence between (1)->(2) and (2)->(3). 1295 // However, there is no dependence edge between (1)->(3). This results 1296 // in all 3 instructions going in the same packet. We ignore dependce 1297 // only once to avoid this situation. 1298 auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); 1299 if (Itr != IgnoreDepMIs.end()) { 1300 Dependence = true; 1301 return false; 1302 } 1303 IgnoreDepMIs.push_back(I); 1304 continue; 1305 } 1306 1307 // Ignore Order dependences between unconditional direct branches 1308 // and non-control-flow instructions. 1309 if (isDirectJump(I) && !J->isBranch() && !J->isCall() && 1310 DepType == SDep::Order) 1311 continue; 1312 1313 // Ignore all dependences for jumps except for true and output 1314 // dependences. 1315 if (I->isConditionalBranch() && DepType != SDep::Data && 1316 DepType != SDep::Output) 1317 continue; 1318 1319 // Ignore output dependences due to superregs. We can write to two 1320 // different subregisters of R1:0 for instance in the same cycle. 1321 1322 // If neither I nor J defines DepReg, then this is a superfluous output 1323 // dependence. The dependence must be of the form: 1324 // R0 = ... 1325 // R1 = ... 1326 // and there is an output dependence between the two instructions with 1327 // DepReg = D0. 1328 // We want to ignore these dependences. Ideally, the dependence 1329 // constructor should annotate such dependences. We can then avoid this 1330 // relatively expensive check. 1331 // 1332 if (DepType == SDep::Output) { 1333 // DepReg is the register that's responsible for the dependence. 1334 unsigned DepReg = SUJ->Succs[i].getReg(); 1335 1336 // Check if I and J really defines DepReg. 1337 if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) 1338 continue; 1339 FoundSequentialDependence = true; 1340 break; 1341 } 1342 1343 // For Order dependences: 1344 // 1. On V4 or later, volatile loads/stores can be packetized together, 1345 // unless other rules prevent is. 1346 // 2. Store followed by a load is not allowed. 1347 // 3. Store followed by a store is only valid on V4 or later. 1348 // 4. Load followed by any memory operation is allowed. 1349 if (DepType == SDep::Order) { 1350 if (!PacketizeVolatiles) { 1351 bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); 1352 if (OrdRefs) { 1353 FoundSequentialDependence = true; 1354 break; 1355 } 1356 } 1357 // J is first, I is second. 1358 bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); 1359 bool LoadI = I->mayLoad(), StoreI = I->mayStore(); 1360 if (StoreJ) { 1361 // Two stores are only allowed on V4+. Load following store is never 1362 // allowed. 1363 if (LoadI) { 1364 FoundSequentialDependence = true; 1365 break; 1366 } 1367 } else if (!LoadJ || (!LoadI && !StoreI)) { 1368 // If J is neither load nor store, assume a dependency. 1369 // If J is a load, but I is neither, also assume a dependency. 1370 FoundSequentialDependence = true; 1371 break; 1372 } 1373 // Store followed by store: not OK on V2. 1374 // Store followed by load: not OK on all. 1375 // Load followed by store: OK on all. 1376 // Load followed by load: OK on all. 1377 continue; 1378 } 1379 1380 // For V4, special case ALLOCFRAME. Even though there is dependency 1381 // between ALLOCFRAME and subsequent store, allow it to be packetized 1382 // in a same packet. This implies that the store is using the caller's 1383 // SP. Hence, offset needs to be updated accordingly. 1384 if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { 1385 unsigned Opc = I->getOpcode(); 1386 switch (Opc) { 1387 case Hexagon::S2_storerd_io: 1388 case Hexagon::S2_storeri_io: 1389 case Hexagon::S2_storerh_io: 1390 case Hexagon::S2_storerb_io: 1391 if (I->getOperand(0).getReg() == HRI->getStackRegister()) { 1392 int64_t Imm = I->getOperand(1).getImm(); 1393 int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); 1394 if (HII->isValidOffset(Opc, NewOff)) { 1395 GlueAllocframeStore = true; 1396 // Since this store is to be glued with allocframe in the same 1397 // packet, it will use SP of the previous stack frame, i.e. 1398 // caller's SP. Therefore, we need to recalculate offset 1399 // according to this change. 1400 I->getOperand(1).setImm(NewOff); 1401 continue; 1402 } 1403 } 1404 default: 1405 break; 1406 } 1407 } 1408 1409 // There are certain anti-dependencies that cannot be ignored. 1410 // Specifically: 1411 // J2_call ... %R0<imp-def> ; SUJ 1412 // R0 = ... ; SUI 1413 // Those cannot be packetized together, since the call will observe 1414 // the effect of the assignment to R0. 1415 if (DepType == SDep::Anti && J->isCall()) { 1416 // Check if I defines any volatile register. We should also check 1417 // registers that the call may read, but these happen to be a 1418 // subset of the volatile register set. 1419 for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) { 1420 if (!I->modifiesRegister(*P, HRI)) 1421 continue; 1422 FoundSequentialDependence = true; 1423 break; 1424 } 1425 } 1426 1427 // Skip over remaining anti-dependences. Two instructions that are 1428 // anti-dependent can share a packet, since in most such cases all 1429 // operands are read before any modifications take place. 1430 // The exceptions are branch and call instructions, since they are 1431 // executed after all other instructions have completed (at least 1432 // conceptually). 1433 if (DepType != SDep::Anti) { 1434 FoundSequentialDependence = true; 1435 break; 1436 } 1437 } 1438 1439 if (FoundSequentialDependence) { 1440 Dependence = true; 1441 return false; 1442 } 1443 1444 return true; 1445 } 1446 1447 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { 1448 MachineInstr *I = SUI->getInstr(); 1449 MachineInstr *J = SUJ->getInstr(); 1450 assert(I && J && "Unable to packetize null instruction!"); 1451 1452 if (cannotCoexist(I, J)) 1453 return false; 1454 1455 if (!Dependence) 1456 return true; 1457 1458 // Check if the instruction was promoted to a dot-new. If so, demote it 1459 // back into a dot-old. 1460 if (PromotedToDotNew) 1461 demoteToDotOld(I); 1462 1463 cleanUpDotCur(); 1464 // Check if the instruction (must be a store) was glued with an allocframe 1465 // instruction. If so, restore its offset to its original value, i.e. use 1466 // current SP instead of caller's SP. 1467 if (GlueAllocframeStore) { 1468 unsigned FrameSize = MF.getFrameInfo()->getStackSize(); 1469 MachineOperand &MOff = I->getOperand(1); 1470 MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); 1471 } 1472 return false; 1473 } 1474 1475 MachineBasicBlock::iterator 1476 HexagonPacketizerList::addToPacket(MachineInstr &MI) { 1477 MachineBasicBlock::iterator MII = MI; 1478 MachineBasicBlock *MBB = MI.getParent(); 1479 if (MI.isImplicitDef()) { 1480 unsigned R = MI.getOperand(0).getReg(); 1481 if (Hexagon::IntRegsRegClass.contains(R)) { 1482 MCSuperRegIterator S(R, HRI, false); 1483 MI.addOperand(MachineOperand::CreateReg(*S, true, true)); 1484 } 1485 return MII; 1486 } 1487 assert(ResourceTracker->canReserveResources(MI)); 1488 1489 bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI); 1490 bool Good = true; 1491 1492 if (GlueToNewValueJump) { 1493 MachineInstr &NvjMI = *++MII; 1494 // We need to put both instructions in the same packet: MI and NvjMI. 1495 // Either of them can require a constant extender. Try to add both to 1496 // the current packet, and if that fails, end the packet and start a 1497 // new one. 1498 ResourceTracker->reserveResources(MI); 1499 if (ExtMI) 1500 Good = tryAllocateResourcesForConstExt(true); 1501 1502 bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI); 1503 if (Good) { 1504 if (ResourceTracker->canReserveResources(NvjMI)) 1505 ResourceTracker->reserveResources(NvjMI); 1506 else 1507 Good = false; 1508 } 1509 if (Good && ExtNvjMI) 1510 Good = tryAllocateResourcesForConstExt(true); 1511 1512 if (!Good) { 1513 endPacket(MBB, MI); 1514 assert(ResourceTracker->canReserveResources(MI)); 1515 ResourceTracker->reserveResources(MI); 1516 if (ExtMI) { 1517 assert(canReserveResourcesForConstExt()); 1518 tryAllocateResourcesForConstExt(true); 1519 } 1520 assert(ResourceTracker->canReserveResources(NvjMI)); 1521 ResourceTracker->reserveResources(NvjMI); 1522 if (ExtNvjMI) { 1523 assert(canReserveResourcesForConstExt()); 1524 reserveResourcesForConstExt(); 1525 } 1526 } 1527 CurrentPacketMIs.push_back(&MI); 1528 CurrentPacketMIs.push_back(&NvjMI); 1529 return MII; 1530 } 1531 1532 ResourceTracker->reserveResources(MI); 1533 if (ExtMI && !tryAllocateResourcesForConstExt(true)) { 1534 endPacket(MBB, MI); 1535 if (PromotedToDotNew) 1536 demoteToDotOld(&MI); 1537 ResourceTracker->reserveResources(MI); 1538 reserveResourcesForConstExt(); 1539 } 1540 1541 CurrentPacketMIs.push_back(&MI); 1542 return MII; 1543 } 1544 1545 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, 1546 MachineBasicBlock::iterator MI) { 1547 OldPacketMIs = CurrentPacketMIs; 1548 VLIWPacketizerList::endPacket(MBB, MI); 1549 } 1550 1551 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) { 1552 return !producesStall(&MI); 1553 } 1554 1555 1556 // Return true when ConsMI uses a register defined by ProdMI. 1557 static bool isDependent(const MachineInstr *ProdMI, 1558 const MachineInstr *ConsMI) { 1559 if (!ProdMI->getOperand(0).isReg()) 1560 return false; 1561 unsigned DstReg = ProdMI->getOperand(0).getReg(); 1562 1563 for (auto &Op : ConsMI->operands()) 1564 if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) 1565 // The MIs depend on each other. 1566 return true; 1567 1568 return false; 1569 } 1570 1571 // V60 forward scheduling. 1572 bool HexagonPacketizerList::producesStall(const MachineInstr *I) { 1573 // Check whether the previous packet is in a different loop. If this is the 1574 // case, there is little point in trying to avoid a stall because that would 1575 // favor the rare case (loop entry) over the common case (loop iteration). 1576 // 1577 // TODO: We should really be able to check all the incoming edges if this is 1578 // the first packet in a basic block, so we can avoid stalls from the loop 1579 // backedge. 1580 if (!OldPacketMIs.empty()) { 1581 auto *OldBB = OldPacketMIs.front()->getParent(); 1582 auto *ThisBB = I->getParent(); 1583 if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) 1584 return false; 1585 } 1586 1587 // Check for stall between two vector instructions. 1588 if (HII->isV60VectorInstruction(I)) { 1589 for (auto J : OldPacketMIs) { 1590 if (!HII->isV60VectorInstruction(J)) 1591 continue; 1592 if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) 1593 return true; 1594 } 1595 return false; 1596 } 1597 1598 // Check for stall between two scalar instructions. First, check that 1599 // there is no definition of a use in the current packet, because it 1600 // may be a candidate for .new. 1601 for (auto J : CurrentPacketMIs) 1602 if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) 1603 return false; 1604 1605 // Check for stall between I and instructions in the previous packet. 1606 if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { 1607 for (auto J : OldPacketMIs) { 1608 if (HII->isV60VectorInstruction(J)) 1609 continue; 1610 if (!HII->isLateInstrFeedsEarlyInstr(J, I)) 1611 continue; 1612 if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) 1613 return true; 1614 } 1615 } 1616 1617 return false; 1618 } 1619 1620 1621 //===----------------------------------------------------------------------===// 1622 // Public Constructor Functions 1623 //===----------------------------------------------------------------------===// 1624 1625 FunctionPass *llvm::createHexagonPacketizer() { 1626 return new HexagonPacketizer(); 1627 } 1628