1 //===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements a simple VLIW packetizer using DFA. The packetizer works on 11 // machine basic blocks. For each instruction I in BB, the packetizer consults 12 // the DFA to see if machine resources are available to execute I. If so, the 13 // packetizer checks if I depends on any instruction J in the current packet. 14 // If no dependency is found, I is added to current packet and machine resource 15 // is marked as taken. If any dependency is found, a target API call is made to 16 // prune the dependence. 17 // 18 //===----------------------------------------------------------------------===// 19 #include "HexagonRegisterInfo.h" 20 #include "HexagonSubtarget.h" 21 #include "HexagonTargetMachine.h" 22 #include "HexagonVLIWPacketizer.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include <map> 33 #include <vector> 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "packets" 38 39 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden, 40 cl::ZeroOrMore, cl::init(false), 41 cl::desc("Disable Hexagon packetizer pass")); 42 43 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles", 44 cl::ZeroOrMore, cl::Hidden, cl::init(true), 45 cl::desc("Allow non-solo packetization of volatile memory references")); 46 47 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false), 48 cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC")); 49 50 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores", 51 cl::init(false), cl::Hidden, cl::ZeroOrMore, 52 cl::desc("Disable vector double new-value-stores")); 53 54 extern cl::opt<bool> ScheduleInlineAsm; 55 56 namespace llvm { 57 FunctionPass *createHexagonPacketizer(); 58 void initializeHexagonPacketizerPass(PassRegistry&); 59 } 60 61 62 namespace { 63 class HexagonPacketizer : public MachineFunctionPass { 64 public: 65 static char ID; 66 HexagonPacketizer() : MachineFunctionPass(ID) { 67 initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry()); 68 } 69 70 void getAnalysisUsage(AnalysisUsage &AU) const override { 71 AU.setPreservesCFG(); 72 AU.addRequired<AAResultsWrapperPass>(); 73 AU.addRequired<MachineBranchProbabilityInfo>(); 74 AU.addRequired<MachineDominatorTree>(); 75 AU.addRequired<MachineLoopInfo>(); 76 AU.addPreserved<MachineDominatorTree>(); 77 AU.addPreserved<MachineLoopInfo>(); 78 MachineFunctionPass::getAnalysisUsage(AU); 79 } 80 const char *getPassName() const override { 81 return "Hexagon Packetizer"; 82 } 83 bool runOnMachineFunction(MachineFunction &Fn) override; 84 85 private: 86 const HexagonInstrInfo *HII; 87 const HexagonRegisterInfo *HRI; 88 }; 89 90 char HexagonPacketizer::ID = 0; 91 } 92 93 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer", 94 false, false) 95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 96 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) 97 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 98 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 99 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer", 100 false, false) 101 102 103 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF, 104 MachineLoopInfo &MLI, AliasAnalysis *AA, 105 const MachineBranchProbabilityInfo *MBPI) 106 : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) { 107 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 108 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 109 } 110 111 // Check if FirstI modifies a register that SecondI reads. 112 static bool hasWriteToReadDep(const MachineInstr *FirstI, 113 const MachineInstr *SecondI, const TargetRegisterInfo *TRI) { 114 for (auto &MO : FirstI->operands()) { 115 if (!MO.isReg() || !MO.isDef()) 116 continue; 117 unsigned R = MO.getReg(); 118 if (SecondI->readsRegister(R, TRI)) 119 return true; 120 } 121 return false; 122 } 123 124 125 static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI, 126 MachineBasicBlock::iterator BundleIt, bool Before) { 127 MachineBasicBlock::instr_iterator InsertPt; 128 if (Before) 129 InsertPt = BundleIt.getInstrIterator(); 130 else 131 InsertPt = std::next(BundleIt).getInstrIterator(); 132 133 MachineBasicBlock &B = *MI->getParent(); 134 // The instruction should at least be bundled with the preceding instruction 135 // (there will always be one, i.e. BUNDLE, if nothing else). 136 assert(MI->isBundledWithPred()); 137 if (MI->isBundledWithSucc()) { 138 MI->clearFlag(MachineInstr::BundledSucc); 139 MI->clearFlag(MachineInstr::BundledPred); 140 } else { 141 // If it's not bundled with the successor (i.e. it is the last one 142 // in the bundle), then we can simply unbundle it from the predecessor, 143 // which will take care of updating the predecessor's flag. 144 MI->unbundleFromPred(); 145 } 146 B.splice(InsertPt, &B, MI); 147 148 // Get the size of the bundle without asserting. 149 MachineBasicBlock::const_instr_iterator I(BundleIt); 150 MachineBasicBlock::const_instr_iterator E = B.instr_end(); 151 unsigned Size = 0; 152 for (++I; I != E && I->isBundledWithPred(); ++I) 153 ++Size; 154 155 // If there are still two or more instructions, then there is nothing 156 // else to be done. 157 if (Size > 1) 158 return BundleIt; 159 160 // Otherwise, extract the single instruction out and delete the bundle. 161 MachineBasicBlock::iterator NextIt = std::next(BundleIt); 162 MachineInstr *SingleI = BundleIt->getNextNode(); 163 SingleI->unbundleFromPred(); 164 assert(!SingleI->isBundledWithSucc()); 165 BundleIt->eraseFromParent(); 166 return NextIt; 167 } 168 169 170 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) { 171 if (DisablePacketizer) 172 return false; 173 174 HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo(); 175 HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo(); 176 auto &MLI = getAnalysis<MachineLoopInfo>(); 177 auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 178 auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>(); 179 180 if (EnableGenAllInsnClass) 181 HII->genAllInsnTimingClasses(MF); 182 183 // Instantiate the packetizer. 184 HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI); 185 186 // DFA state table should not be empty. 187 assert(Packetizer.getResourceTracker() && "Empty DFA table!"); 188 189 // 190 // Loop over all basic blocks and remove KILL pseudo-instructions 191 // These instructions confuse the dependence analysis. Consider: 192 // D0 = ... (Insn 0) 193 // R0 = KILL R0, D0 (Insn 1) 194 // R0 = ... (Insn 2) 195 // Here, Insn 1 will result in the dependence graph not emitting an output 196 // dependence between Insn 0 and Insn 2. This can lead to incorrect 197 // packetization 198 // 199 for (auto &MB : MF) { 200 auto End = MB.end(); 201 auto MI = MB.begin(); 202 while (MI != End) { 203 auto NextI = std::next(MI); 204 if (MI->isKill()) { 205 MB.erase(MI); 206 End = MB.end(); 207 } 208 MI = NextI; 209 } 210 } 211 212 // Loop over all of the basic blocks. 213 for (auto &MB : MF) { 214 auto Begin = MB.begin(), End = MB.end(); 215 while (Begin != End) { 216 // First the first non-boundary starting from the end of the last 217 // scheduling region. 218 MachineBasicBlock::iterator RB = Begin; 219 while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF)) 220 ++RB; 221 // First the first boundary starting from the beginning of the new 222 // region. 223 MachineBasicBlock::iterator RE = RB; 224 while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF)) 225 ++RE; 226 // Add the scheduling boundary if it's not block end. 227 if (RE != End) 228 ++RE; 229 // If RB == End, then RE == End. 230 if (RB != End) 231 Packetizer.PacketizeMIs(&MB, RB, RE); 232 233 Begin = RE; 234 } 235 } 236 237 Packetizer.unpacketizeSoloInstrs(MF); 238 return true; 239 } 240 241 242 // Reserve resources for a constant extender. Trigger an assertion if the 243 // reservation fails. 244 void HexagonPacketizerList::reserveResourcesForConstExt() { 245 if (!tryAllocateResourcesForConstExt(true)) 246 llvm_unreachable("Resources not available"); 247 } 248 249 bool HexagonPacketizerList::canReserveResourcesForConstExt() { 250 return tryAllocateResourcesForConstExt(false); 251 } 252 253 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded, 254 // return true, otherwise, return false. 255 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) { 256 auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc()); 257 bool Avail = ResourceTracker->canReserveResources(ExtMI); 258 if (Reserve && Avail) 259 ResourceTracker->reserveResources(ExtMI); 260 MF.DeleteMachineInstr(ExtMI); 261 return Avail; 262 } 263 264 265 bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI, 266 SDep::Kind DepType, unsigned DepReg) { 267 // Check for LR dependence. 268 if (DepReg == HRI->getRARegister()) 269 return true; 270 271 if (HII->isDeallocRet(MI)) 272 if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister()) 273 return true; 274 275 // Check if this is a predicate dependence. 276 const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg); 277 if (RC == &Hexagon::PredRegsRegClass) 278 return true; 279 280 // Assumes that the first operand of the CALLr is the function address. 281 if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) { 282 MachineOperand MO = MI->getOperand(0); 283 if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg)) 284 return true; 285 } 286 287 return false; 288 } 289 290 static bool isRegDependence(const SDep::Kind DepType) { 291 return DepType == SDep::Data || DepType == SDep::Anti || 292 DepType == SDep::Output; 293 } 294 295 static bool isDirectJump(const MachineInstr* MI) { 296 return MI->getOpcode() == Hexagon::J2_jump; 297 } 298 299 static bool isSchedBarrier(const MachineInstr* MI) { 300 switch (MI->getOpcode()) { 301 case Hexagon::Y2_barrier: 302 return true; 303 } 304 return false; 305 } 306 307 static bool isControlFlow(const MachineInstr* MI) { 308 return (MI->getDesc().isTerminator() || MI->getDesc().isCall()); 309 } 310 311 312 /// Returns true if the instruction modifies a callee-saved register. 313 static bool doesModifyCalleeSavedReg(const MachineInstr *MI, 314 const TargetRegisterInfo *TRI) { 315 const MachineFunction &MF = *MI->getParent()->getParent(); 316 for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR) 317 if (MI->modifiesRegister(*CSR, TRI)) 318 return true; 319 return false; 320 } 321 322 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) 323 // Returns true if an instruction can be promoted to .new predicate or 324 // new-value store. 325 bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) { 326 return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI); 327 } 328 329 // Promote an instructiont to its .cur form. 330 // At this time, we have already made a call to canPromoteToDotCur and made 331 // sure that it can *indeed* be promoted. 332 bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI, 333 SDep::Kind DepType, MachineBasicBlock::iterator &MII, 334 const TargetRegisterClass* RC) { 335 assert(DepType == SDep::Data); 336 int CurOpcode = HII->getDotCurOp(MI); 337 MI->setDesc(HII->get(CurOpcode)); 338 return true; 339 } 340 341 void HexagonPacketizerList::cleanUpDotCur() { 342 MachineInstr *MI = NULL; 343 for (auto BI : CurrentPacketMIs) { 344 DEBUG(dbgs() << "Cleanup packet has "; BI->dump();); 345 if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) { 346 MI = BI; 347 continue; 348 } 349 if (MI) { 350 for (auto &MO : BI->operands()) 351 if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg()) 352 return; 353 } 354 } 355 if (!MI) 356 return; 357 // We did not find a use of the CUR, so de-cur it. 358 MI->setDesc(HII->get(Hexagon::V6_vL32b_ai)); 359 DEBUG(dbgs() << "Demoted CUR "; MI->dump();); 360 } 361 362 // Check to see if an instruction can be dot cur. 363 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI, 364 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 365 const TargetRegisterClass *RC) { 366 if (!HII->isV60VectorInstruction(MI)) 367 return false; 368 if (!HII->isV60VectorInstruction(MII)) 369 return false; 370 371 // Already a dot new instruction. 372 if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI)) 373 return false; 374 375 if (!HII->mayBeCurLoad(MI)) 376 return false; 377 378 // The "cur value" cannot come from inline asm. 379 if (PacketSU->getInstr()->isInlineAsm()) 380 return false; 381 382 // Make sure candidate instruction uses cur. 383 DEBUG(dbgs() << "Can we DOT Cur Vector MI\n"; 384 MI->dump(); 385 dbgs() << "in packet\n";); 386 MachineInstr *MJ = MII; 387 DEBUG(dbgs() << "Checking CUR against "; MJ->dump();); 388 unsigned DestReg = MI->getOperand(0).getReg(); 389 bool FoundMatch = false; 390 for (auto &MO : MJ->operands()) 391 if (MO.isReg() && MO.getReg() == DestReg) 392 FoundMatch = true; 393 if (!FoundMatch) 394 return false; 395 396 // Check for existing uses of a vector register within the packet which 397 // would be affected by converting a vector load into .cur formt. 398 for (auto BI : CurrentPacketMIs) { 399 DEBUG(dbgs() << "packet has "; BI->dump();); 400 if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo())) 401 return false; 402 } 403 404 DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump();); 405 // We can convert the opcode into a .cur. 406 return true; 407 } 408 409 // Promote an instruction to its .new form. At this time, we have already 410 // made a call to canPromoteToDotNew and made sure that it can *indeed* be 411 // promoted. 412 bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI, 413 SDep::Kind DepType, MachineBasicBlock::iterator &MII, 414 const TargetRegisterClass* RC) { 415 assert (DepType == SDep::Data); 416 int NewOpcode; 417 if (RC == &Hexagon::PredRegsRegClass) 418 NewOpcode = HII->getDotNewPredOp(MI, MBPI); 419 else 420 NewOpcode = HII->getDotNewOp(MI); 421 MI->setDesc(HII->get(NewOpcode)); 422 return true; 423 } 424 425 bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) { 426 int NewOpcode = HII->getDotOldOp(MI->getOpcode()); 427 MI->setDesc(HII->get(NewOpcode)); 428 return true; 429 } 430 431 enum PredicateKind { 432 PK_False, 433 PK_True, 434 PK_Unknown 435 }; 436 437 /// Returns true if an instruction is predicated on p0 and false if it's 438 /// predicated on !p0. 439 static PredicateKind getPredicateSense(const MachineInstr *MI, 440 const HexagonInstrInfo *HII) { 441 if (!HII->isPredicated(MI)) 442 return PK_Unknown; 443 if (HII->isPredicatedTrue(MI)) 444 return PK_True; 445 return PK_False; 446 } 447 448 static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI, 449 const HexagonInstrInfo *HII) { 450 assert(HII->isPostIncrement(MI) && "Not a post increment operation."); 451 #ifndef NDEBUG 452 // Post Increment means duplicates. Use dense map to find duplicates in the 453 // list. Caution: Densemap initializes with the minimum of 64 buckets, 454 // whereas there are at most 5 operands in the post increment. 455 DenseSet<unsigned> DefRegsSet; 456 for (auto &MO : MI->operands()) 457 if (MO.isReg() && MO.isDef()) 458 DefRegsSet.insert(MO.getReg()); 459 460 for (auto &MO : MI->operands()) 461 if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg())) 462 return MO; 463 #else 464 if (MI->mayLoad()) { 465 const MachineOperand &Op1 = MI->getOperand(1); 466 // The 2nd operand is always the post increment operand in load. 467 assert(Op1.isReg() && "Post increment operand has be to a register."); 468 return Op1; 469 } 470 if (MI->getDesc().mayStore()) { 471 const MachineOperand &Op0 = MI->getOperand(0); 472 // The 1st operand is always the post increment operand in store. 473 assert(Op0.isReg() && "Post increment operand has be to a register."); 474 return Op0; 475 } 476 #endif 477 // we should never come here. 478 llvm_unreachable("mayLoad or mayStore not set for Post Increment operation"); 479 } 480 481 // Get the value being stored. 482 static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) { 483 // value being stored is always the last operand. 484 return MI->getOperand(MI->getNumOperands()-1); 485 } 486 487 static bool isLoadAbsSet(const MachineInstr *MI) { 488 unsigned Opc = MI->getOpcode(); 489 switch (Opc) { 490 case Hexagon::L4_loadrd_ap: 491 case Hexagon::L4_loadrb_ap: 492 case Hexagon::L4_loadrh_ap: 493 case Hexagon::L4_loadrub_ap: 494 case Hexagon::L4_loadruh_ap: 495 case Hexagon::L4_loadri_ap: 496 return true; 497 } 498 return false; 499 } 500 501 static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) { 502 assert(isLoadAbsSet(MI)); 503 return MI->getOperand(1); 504 } 505 506 507 // Can be new value store? 508 // Following restrictions are to be respected in convert a store into 509 // a new value store. 510 // 1. If an instruction uses auto-increment, its address register cannot 511 // be a new-value register. Arch Spec 5.4.2.1 512 // 2. If an instruction uses absolute-set addressing mode, its address 513 // register cannot be a new-value register. Arch Spec 5.4.2.1. 514 // 3. If an instruction produces a 64-bit result, its registers cannot be used 515 // as new-value registers. Arch Spec 5.4.2.2. 516 // 4. If the instruction that sets the new-value register is conditional, then 517 // the instruction that uses the new-value register must also be conditional, 518 // and both must always have their predicates evaluate identically. 519 // Arch Spec 5.4.2.3. 520 // 5. There is an implied restriction that a packet cannot have another store, 521 // if there is a new value store in the packet. Corollary: if there is 522 // already a store in a packet, there can not be a new value store. 523 // Arch Spec: 3.4.4.2 524 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI, 525 const MachineInstr *PacketMI, unsigned DepReg) { 526 // Make sure we are looking at the store, that can be promoted. 527 if (!HII->mayBeNewStore(MI)) 528 return false; 529 530 // Make sure there is dependency and can be new value'd. 531 const MachineOperand &Val = getStoreValueOperand(MI); 532 if (Val.isReg() && Val.getReg() != DepReg) 533 return false; 534 535 const MCInstrDesc& MCID = PacketMI->getDesc(); 536 537 // First operand is always the result. 538 const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF); 539 // Double regs can not feed into new value store: PRM section: 5.4.2.2. 540 if (PacketRC == &Hexagon::DoubleRegsRegClass) 541 return false; 542 543 // New-value stores are of class NV (slot 0), dual stores require class ST 544 // in slot 0 (PRM 5.5). 545 for (auto I : CurrentPacketMIs) { 546 SUnit *PacketSU = MIToSUnit.find(I)->second; 547 if (PacketSU->getInstr()->mayStore()) 548 return false; 549 } 550 551 // Make sure it's NOT the post increment register that we are going to 552 // new value. 553 if (HII->isPostIncrement(MI) && 554 getPostIncrementOperand(MI, HII).getReg() == DepReg) { 555 return false; 556 } 557 558 if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() && 559 getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) { 560 // If source is post_inc, or absolute-set addressing, it can not feed 561 // into new value store 562 // r3 = memw(r2++#4) 563 // memw(r30 + #-1404) = r2.new -> can not be new value store 564 // arch spec section: 5.4.2.1. 565 return false; 566 } 567 568 if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg) 569 return false; 570 571 // If the source that feeds the store is predicated, new value store must 572 // also be predicated. 573 if (HII->isPredicated(PacketMI)) { 574 if (!HII->isPredicated(MI)) 575 return false; 576 577 // Check to make sure that they both will have their predicates 578 // evaluate identically. 579 unsigned predRegNumSrc = 0; 580 unsigned predRegNumDst = 0; 581 const TargetRegisterClass* predRegClass = nullptr; 582 583 // Get predicate register used in the source instruction. 584 for (auto &MO : PacketMI->operands()) { 585 if (!MO.isReg()) 586 continue; 587 predRegNumSrc = MO.getReg(); 588 predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc); 589 if (predRegClass == &Hexagon::PredRegsRegClass) 590 break; 591 } 592 assert((predRegClass == &Hexagon::PredRegsRegClass) && 593 "predicate register not found in a predicated PacketMI instruction"); 594 595 // Get predicate register used in new-value store instruction. 596 for (auto &MO : MI->operands()) { 597 if (!MO.isReg()) 598 continue; 599 predRegNumDst = MO.getReg(); 600 predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst); 601 if (predRegClass == &Hexagon::PredRegsRegClass) 602 break; 603 } 604 assert((predRegClass == &Hexagon::PredRegsRegClass) && 605 "predicate register not found in a predicated MI instruction"); 606 607 // New-value register producer and user (store) need to satisfy these 608 // constraints: 609 // 1) Both instructions should be predicated on the same register. 610 // 2) If producer of the new-value register is .new predicated then store 611 // should also be .new predicated and if producer is not .new predicated 612 // then store should not be .new predicated. 613 // 3) Both new-value register producer and user should have same predicate 614 // sense, i.e, either both should be negated or both should be non-negated. 615 if (predRegNumDst != predRegNumSrc || 616 HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) || 617 getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII)) 618 return false; 619 } 620 621 // Make sure that other than the new-value register no other store instruction 622 // register has been modified in the same packet. Predicate registers can be 623 // modified by they should not be modified between the producer and the store 624 // instruction as it will make them both conditional on different values. 625 // We already know this to be true for all the instructions before and 626 // including PacketMI. Howerver, we need to perform the check for the 627 // remaining instructions in the packet. 628 629 unsigned StartCheck = 0; 630 631 for (auto I : CurrentPacketMIs) { 632 SUnit *TempSU = MIToSUnit.find(I)->second; 633 MachineInstr* TempMI = TempSU->getInstr(); 634 635 // Following condition is true for all the instructions until PacketMI is 636 // reached (StartCheck is set to 0 before the for loop). 637 // StartCheck flag is 1 for all the instructions after PacketMI. 638 if (TempMI != PacketMI && !StartCheck) // Start processing only after 639 continue; // encountering PacketMI. 640 641 StartCheck = 1; 642 if (TempMI == PacketMI) // We don't want to check PacketMI for dependence. 643 continue; 644 645 for (auto &MO : MI->operands()) 646 if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI)) 647 return false; 648 } 649 650 // Make sure that for non-POST_INC stores: 651 // 1. The only use of reg is DepReg and no other registers. 652 // This handles V4 base+index registers. 653 // The following store can not be dot new. 654 // Eg. r0 = add(r0, #3) 655 // memw(r1+r0<<#2) = r0 656 if (!HII->isPostIncrement(MI)) { 657 for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) { 658 const MachineOperand &MO = MI->getOperand(opNum); 659 if (MO.isReg() && MO.getReg() == DepReg) 660 return false; 661 } 662 } 663 664 // If data definition is because of implicit definition of the register, 665 // do not newify the store. Eg. 666 // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def> 667 // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343] 668 for (auto &MO : PacketMI->operands()) { 669 if (!MO.isReg() || !MO.isDef() || !MO.isImplicit()) 670 continue; 671 unsigned R = MO.getReg(); 672 if (R == DepReg || HRI->isSuperRegister(DepReg, R)) 673 return false; 674 } 675 676 // Handle imp-use of super reg case. There is a target independent side 677 // change that should prevent this situation but I am handling it for 678 // just-in-case. For example, we cannot newify R2 in the following case: 679 // %R3<def> = A2_tfrsi 0; 680 // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>; 681 for (auto &MO : MI->operands()) { 682 if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg) 683 return false; 684 } 685 686 // Can be dot new store. 687 return true; 688 } 689 690 // Can this MI to promoted to either new value store or new value jump. 691 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI, 692 const SUnit *PacketSU, unsigned DepReg, 693 MachineBasicBlock::iterator &MII) { 694 if (!HII->mayBeNewStore(MI)) 695 return false; 696 697 // Check to see the store can be new value'ed. 698 MachineInstr *PacketMI = PacketSU->getInstr(); 699 if (canPromoteToNewValueStore(MI, PacketMI, DepReg)) 700 return true; 701 702 // Check to see the compare/jump can be new value'ed. 703 // This is done as a pass on its own. Don't need to check it here. 704 return false; 705 } 706 707 static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) { 708 for (auto &MO : I->operands()) 709 if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit()) 710 return true; 711 return false; 712 } 713 714 // Check to see if an instruction can be dot new 715 // There are three kinds. 716 // 1. dot new on predicate - V2/V3/V4 717 // 2. dot new on stores NV/ST - V4 718 // 3. dot new on jump NV/J - V4 -- This is generated in a pass. 719 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI, 720 const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII, 721 const TargetRegisterClass* RC) { 722 // Already a dot new instruction. 723 if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI)) 724 return false; 725 726 if (!isNewifiable(MI)) 727 return false; 728 729 const MachineInstr *PI = PacketSU->getInstr(); 730 731 // The "new value" cannot come from inline asm. 732 if (PI->isInlineAsm()) 733 return false; 734 735 // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no 736 // sense. 737 if (PI->isImplicitDef()) 738 return false; 739 740 // If dependency is trough an implicitly defined register, we should not 741 // newify the use. 742 if (isImplicitDependency(PI, DepReg)) 743 return false; 744 745 const MCInstrDesc& MCID = PI->getDesc(); 746 const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF); 747 if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass) 748 return false; 749 750 // predicate .new 751 // bug 5670: until that is fixed 752 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI) 753 if (RC == &Hexagon::PredRegsRegClass) 754 if (HII->isCondInst(MI) || MI->isReturn()) 755 return HII->predCanBeUsedAsDotNew(PI, DepReg); 756 757 if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI)) 758 return false; 759 760 // Create a dot new machine instruction to see if resources can be 761 // allocated. If not, bail out now. 762 int NewOpcode = HII->getDotNewOp(MI); 763 const MCInstrDesc &D = HII->get(NewOpcode); 764 MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc()); 765 bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI); 766 MF.DeleteMachineInstr(NewMI); 767 if (!ResourcesAvailable) 768 return false; 769 770 // New Value Store only. New Value Jump generated as a separate pass. 771 if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII)) 772 return false; 773 774 return true; 775 } 776 777 // Go through the packet instructions and search for an anti dependency between 778 // them and DepReg from MI. Consider this case: 779 // Trying to add 780 // a) %R1<def> = TFRI_cdNotPt %P3, 2 781 // to this packet: 782 // { 783 // b) %P0<def> = C2_or %P3<kill>, %P0<kill> 784 // c) %P3<def> = C2_tfrrp %R23 785 // d) %R1<def> = C2_cmovenewit %P3, 4 786 // } 787 // The P3 from a) and d) will be complements after 788 // a)'s P3 is converted to .new form 789 // Anti-dep between c) and b) is irrelevant for this case 790 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI, 791 unsigned DepReg) { 792 SUnit *PacketSUDep = MIToSUnit.find(MI)->second; 793 794 for (auto I : CurrentPacketMIs) { 795 // We only care for dependencies to predicated instructions 796 if (!HII->isPredicated(I)) 797 continue; 798 799 // Scheduling Unit for current insn in the packet 800 SUnit *PacketSU = MIToSUnit.find(I)->second; 801 802 // Look at dependencies between current members of the packet and 803 // predicate defining instruction MI. Make sure that dependency is 804 // on the exact register we care about. 805 if (PacketSU->isSucc(PacketSUDep)) { 806 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 807 auto &Dep = PacketSU->Succs[i]; 808 if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti && 809 Dep.getReg() == DepReg) 810 return true; 811 } 812 } 813 } 814 815 return false; 816 } 817 818 819 /// Gets the predicate register of a predicated instruction. 820 static unsigned getPredicatedRegister(MachineInstr *MI, 821 const HexagonInstrInfo *QII) { 822 /// We use the following rule: The first predicate register that is a use is 823 /// the predicate register of a predicated instruction. 824 assert(QII->isPredicated(MI) && "Must be predicated instruction"); 825 826 for (auto &Op : MI->operands()) { 827 if (Op.isReg() && Op.getReg() && Op.isUse() && 828 Hexagon::PredRegsRegClass.contains(Op.getReg())) 829 return Op.getReg(); 830 } 831 832 llvm_unreachable("Unknown instruction operand layout"); 833 return 0; 834 } 835 836 // Given two predicated instructions, this function detects whether 837 // the predicates are complements. 838 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1, 839 MachineInstr *MI2) { 840 // If we don't know the predicate sense of the instructions bail out early, we 841 // need it later. 842 if (getPredicateSense(MI1, HII) == PK_Unknown || 843 getPredicateSense(MI2, HII) == PK_Unknown) 844 return false; 845 846 // Scheduling unit for candidate. 847 SUnit *SU = MIToSUnit[MI1]; 848 849 // One corner case deals with the following scenario: 850 // Trying to add 851 // a) %R24<def> = A2_tfrt %P0, %R25 852 // to this packet: 853 // { 854 // b) %R25<def> = A2_tfrf %P0, %R24 855 // c) %P0<def> = C2_cmpeqi %R26, 1 856 // } 857 // 858 // On general check a) and b) are complements, but presence of c) will 859 // convert a) to .new form, and then it is not a complement. 860 // We attempt to detect it by analyzing existing dependencies in the packet. 861 862 // Analyze relationships between all existing members of the packet. 863 // Look for Anti dependecy on the same predicate reg as used in the 864 // candidate. 865 for (auto I : CurrentPacketMIs) { 866 // Scheduling Unit for current insn in the packet. 867 SUnit *PacketSU = MIToSUnit.find(I)->second; 868 869 // If this instruction in the packet is succeeded by the candidate... 870 if (PacketSU->isSucc(SU)) { 871 for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) { 872 auto Dep = PacketSU->Succs[i]; 873 // The corner case exist when there is true data dependency between 874 // candidate and one of current packet members, this dep is on 875 // predicate reg, and there already exist anti dep on the same pred in 876 // the packet. 877 if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data && 878 Hexagon::PredRegsRegClass.contains(Dep.getReg())) { 879 // Here I know that I is predicate setting instruction with true 880 // data dep to candidate on the register we care about - c) in the 881 // above example. Now I need to see if there is an anti dependency 882 // from c) to any other instruction in the same packet on the pred 883 // reg of interest. 884 if (restrictingDepExistInPacket(I, Dep.getReg())) 885 return false; 886 } 887 } 888 } 889 } 890 891 // If the above case does not apply, check regular complement condition. 892 // Check that the predicate register is the same and that the predicate 893 // sense is different We also need to differentiate .old vs. .new: !p0 894 // is not complementary to p0.new. 895 unsigned PReg1 = getPredicatedRegister(MI1, HII); 896 unsigned PReg2 = getPredicatedRegister(MI2, HII); 897 return PReg1 == PReg2 && 898 Hexagon::PredRegsRegClass.contains(PReg1) && 899 Hexagon::PredRegsRegClass.contains(PReg2) && 900 getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) && 901 HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2); 902 } 903 904 // Initialize packetizer flags. 905 void HexagonPacketizerList::initPacketizerState() { 906 Dependence = false; 907 PromotedToDotNew = false; 908 GlueToNewValueJump = false; 909 GlueAllocframeStore = false; 910 FoundSequentialDependence = false; 911 } 912 913 // Ignore bundling of pseudo instructions. 914 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI, 915 const MachineBasicBlock*) { 916 if (MI->isDebugValue()) 917 return true; 918 919 if (MI->isCFIInstruction()) 920 return false; 921 922 // We must print out inline assembly. 923 if (MI->isInlineAsm()) 924 return false; 925 926 if (MI->isImplicitDef()) 927 return false; 928 929 // We check if MI has any functional units mapped to it. If it doesn't, 930 // we ignore the instruction. 931 const MCInstrDesc& TID = MI->getDesc(); 932 auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass()); 933 unsigned FuncUnits = IS->getUnits(); 934 return !FuncUnits; 935 } 936 937 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) { 938 if (MI->isEHLabel() || MI->isCFIInstruction()) 939 return true; 940 941 // Consider inline asm to not be a solo instruction by default. 942 // Inline asm will be put in a packet temporarily, but then it will be 943 // removed, and placed outside of the packet (before or after, depending 944 // on dependencies). This is to reduce the impact of inline asm as a 945 // "packet splitting" instruction. 946 if (MI->isInlineAsm() && !ScheduleInlineAsm) 947 return true; 948 949 // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints: 950 // trap, pause, barrier, icinva, isync, and syncht are solo instructions. 951 // They must not be grouped with other instructions in a packet. 952 if (isSchedBarrier(MI)) 953 return true; 954 955 if (HII->isSolo(MI)) 956 return true; 957 958 if (MI->getOpcode() == Hexagon::A2_nop) 959 return true; 960 961 return false; 962 } 963 964 965 // Quick check if instructions MI and MJ cannot coexist in the same packet. 966 // Limit the tests to be "one-way", e.g. "if MI->isBranch and MJ->isInlineAsm", 967 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm". 968 // For full test call this function twice: 969 // cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI) 970 // Doing the test only one way saves the amount of code in this function, 971 // since every test would need to be repeated with the MI and MJ reversed. 972 static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ, 973 const HexagonInstrInfo &HII) { 974 const MachineFunction *MF = MI->getParent()->getParent(); 975 if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() && 976 HII.isHVXMemWithAIndirect(MI, MJ)) 977 return true; 978 979 // An inline asm cannot be together with a branch, because we may not be 980 // able to remove the asm out after packetizing (i.e. if the asm must be 981 // moved past the bundle). Similarly, two asms cannot be together to avoid 982 // complications when determining their relative order outside of a bundle. 983 if (MI->isInlineAsm()) 984 return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() || 985 MJ->isCall() || MJ->isTerminator(); 986 987 // "False" really means that the quick check failed to determine if 988 // I and J cannot coexist. 989 return false; 990 } 991 992 993 // Full, symmetric check. 994 bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI, 995 const MachineInstr *MJ) { 996 return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII); 997 } 998 999 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) { 1000 for (auto &B : MF) { 1001 MachineBasicBlock::iterator BundleIt; 1002 MachineBasicBlock::instr_iterator NextI; 1003 for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) { 1004 NextI = std::next(I); 1005 MachineInstr *MI = &*I; 1006 if (MI->isBundle()) 1007 BundleIt = I; 1008 if (!MI->isInsideBundle()) 1009 continue; 1010 1011 // Decide on where to insert the instruction that we are pulling out. 1012 // Debug instructions always go before the bundle, but the placement of 1013 // INLINE_ASM depends on potential dependencies. By default, try to 1014 // put it before the bundle, but if the asm writes to a register that 1015 // other instructions in the bundle read, then we need to place it 1016 // after the bundle (to preserve the bundle semantics). 1017 bool InsertBeforeBundle; 1018 if (MI->isInlineAsm()) 1019 InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI); 1020 else if (MI->isDebugValue()) 1021 InsertBeforeBundle = true; 1022 else 1023 continue; 1024 1025 BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle); 1026 } 1027 } 1028 } 1029 1030 // Check if a given instruction is of class "system". 1031 static bool isSystemInstr(const MachineInstr *MI) { 1032 unsigned Opc = MI->getOpcode(); 1033 switch (Opc) { 1034 case Hexagon::Y2_barrier: 1035 case Hexagon::Y2_dcfetchbo: 1036 return true; 1037 } 1038 return false; 1039 } 1040 1041 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I, 1042 const MachineInstr *J) { 1043 // The dependence graph may not include edges between dead definitions, 1044 // so without extra checks, we could end up packetizing two instruction 1045 // defining the same (dead) register. 1046 if (I->isCall() || J->isCall()) 1047 return false; 1048 if (HII->isPredicated(I) || HII->isPredicated(J)) 1049 return false; 1050 1051 BitVector DeadDefs(Hexagon::NUM_TARGET_REGS); 1052 for (auto &MO : I->operands()) { 1053 if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1054 continue; 1055 DeadDefs[MO.getReg()] = true; 1056 } 1057 1058 for (auto &MO : J->operands()) { 1059 if (!MO.isReg() || !MO.isDef() || !MO.isDead()) 1060 continue; 1061 unsigned R = MO.getReg(); 1062 if (R != Hexagon::USR_OVF && DeadDefs[R]) 1063 return true; 1064 } 1065 return false; 1066 } 1067 1068 bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I, 1069 const MachineInstr *J) { 1070 // A save callee-save register function call can only be in a packet 1071 // with instructions that don't write to the callee-save registers. 1072 if ((HII->isSaveCalleeSavedRegsCall(I) && 1073 doesModifyCalleeSavedReg(J, HRI)) || 1074 (HII->isSaveCalleeSavedRegsCall(J) && 1075 doesModifyCalleeSavedReg(I, HRI))) 1076 return true; 1077 1078 // Two control flow instructions cannot go in the same packet. 1079 if (isControlFlow(I) && isControlFlow(J)) 1080 return true; 1081 1082 // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot 1083 // contain a speculative indirect jump, 1084 // a new-value compare jump or a dealloc_return. 1085 auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool { 1086 if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI)) 1087 return true; 1088 if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI)) 1089 return true; 1090 return false; 1091 }; 1092 1093 if (HII->isLoopN(I) && isBadForLoopN(J)) 1094 return true; 1095 if (HII->isLoopN(J) && isBadForLoopN(I)) 1096 return true; 1097 1098 // dealloc_return cannot appear in the same packet as a conditional or 1099 // unconditional jump. 1100 return HII->isDeallocRet(I) && 1101 (J->isBranch() || J->isCall() || J->isBarrier()); 1102 } 1103 1104 bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I, 1105 const MachineInstr *J) { 1106 bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J); 1107 bool StoreI = I->mayStore(), StoreJ = J->mayStore(); 1108 if ((SysI && StoreJ) || (SysJ && StoreI)) 1109 return true; 1110 1111 if (StoreI && StoreJ) { 1112 if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I)) 1113 return true; 1114 } else { 1115 // A memop cannot be in the same packet with another memop or a store. 1116 // Two stores can be together, but here I and J cannot both be stores. 1117 bool MopStI = HII->isMemOp(I) || StoreI; 1118 bool MopStJ = HII->isMemOp(J) || StoreJ; 1119 if (MopStI && MopStJ) 1120 return true; 1121 } 1122 1123 return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J)); 1124 } 1125 1126 // SUI is the current instruction that is out side of the current packet. 1127 // SUJ is the current instruction inside the current packet against which that 1128 // SUI will be packetized. 1129 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) { 1130 MachineInstr *I = SUI->getInstr(); 1131 MachineInstr *J = SUJ->getInstr(); 1132 assert(I && J && "Unable to packetize null instruction!"); 1133 1134 // Clear IgnoreDepMIs when Packet starts. 1135 if (CurrentPacketMIs.size() == 1) 1136 IgnoreDepMIs.clear(); 1137 1138 MachineBasicBlock::iterator II = I; 1139 const unsigned FrameSize = MF.getFrameInfo()->getStackSize(); 1140 1141 // Solo instructions cannot go in the packet. 1142 assert(!isSoloInstruction(I) && "Unexpected solo instr!"); 1143 1144 if (cannotCoexist(I, J)) 1145 return false; 1146 1147 Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J); 1148 if (Dependence) 1149 return false; 1150 1151 // V4 allows dual stores. It does not allow second store, if the first 1152 // store is not in SLOT0. New value store, new value jump, dealloc_return 1153 // and memop always take SLOT0. Arch spec 3.4.4.2. 1154 Dependence = hasV4SpecificDependence(I, J); 1155 if (Dependence) 1156 return false; 1157 1158 // If an instruction feeds new value jump, glue it. 1159 MachineBasicBlock::iterator NextMII = I; 1160 ++NextMII; 1161 if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) { 1162 MachineInstr *NextMI = NextMII; 1163 1164 bool secondRegMatch = false; 1165 const MachineOperand &NOp0 = NextMI->getOperand(0); 1166 const MachineOperand &NOp1 = NextMI->getOperand(1); 1167 1168 if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg()) 1169 secondRegMatch = true; 1170 1171 for (auto I : CurrentPacketMIs) { 1172 SUnit *PacketSU = MIToSUnit.find(I)->second; 1173 MachineInstr *PI = PacketSU->getInstr(); 1174 // NVJ can not be part of the dual jump - Arch Spec: section 7.8. 1175 if (PI->isCall()) { 1176 Dependence = true; 1177 break; 1178 } 1179 // Validate: 1180 // 1. Packet does not have a store in it. 1181 // 2. If the first operand of the nvj is newified, and the second 1182 // operand is also a reg, it (second reg) is not defined in 1183 // the same packet. 1184 // 3. If the second operand of the nvj is newified, (which means 1185 // first operand is also a reg), first reg is not defined in 1186 // the same packet. 1187 if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() || 1188 HII->isLoopN(PI)) { 1189 Dependence = true; 1190 break; 1191 } 1192 // Check #2/#3. 1193 const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1; 1194 if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) { 1195 Dependence = true; 1196 break; 1197 } 1198 } 1199 1200 if (Dependence) 1201 return false; 1202 GlueToNewValueJump = true; 1203 } 1204 1205 // There no dependency between a prolog instruction and its successor. 1206 if (!SUJ->isSucc(SUI)) 1207 return true; 1208 1209 for (unsigned i = 0; i < SUJ->Succs.size(); ++i) { 1210 if (FoundSequentialDependence) 1211 break; 1212 1213 if (SUJ->Succs[i].getSUnit() != SUI) 1214 continue; 1215 1216 SDep::Kind DepType = SUJ->Succs[i].getKind(); 1217 // For direct calls: 1218 // Ignore register dependences for call instructions for packetization 1219 // purposes except for those due to r31 and predicate registers. 1220 // 1221 // For indirect calls: 1222 // Same as direct calls + check for true dependences to the register 1223 // used in the indirect call. 1224 // 1225 // We completely ignore Order dependences for call instructions. 1226 // 1227 // For returns: 1228 // Ignore register dependences for return instructions like jumpr, 1229 // dealloc return unless we have dependencies on the explicit uses 1230 // of the registers used by jumpr (like r31) or dealloc return 1231 // (like r29 or r30). 1232 // 1233 // TODO: Currently, jumpr is handling only return of r31. So, the 1234 // following logic (specificaly isCallDependent) is working fine. 1235 // We need to enable jumpr for register other than r31 and then, 1236 // we need to rework the last part, where it handles indirect call 1237 // of that (isCallDependent) function. Bug 6216 is opened for this. 1238 unsigned DepReg = 0; 1239 const TargetRegisterClass *RC = nullptr; 1240 if (DepType == SDep::Data) { 1241 DepReg = SUJ->Succs[i].getReg(); 1242 RC = HRI->getMinimalPhysRegClass(DepReg); 1243 } 1244 1245 if (I->isCall() || I->isReturn()) { 1246 if (!isRegDependence(DepType)) 1247 continue; 1248 if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg())) 1249 continue; 1250 } 1251 1252 if (DepType == SDep::Data) { 1253 if (canPromoteToDotCur(J, SUJ, DepReg, II, RC)) 1254 if (promoteToDotCur(J, DepType, II, RC)) 1255 continue; 1256 } 1257 1258 // Data dpendence ok if we have load.cur. 1259 if (DepType == SDep::Data && HII->isDotCurInst(J)) { 1260 if (HII->isV60VectorInstruction(I)) 1261 continue; 1262 } 1263 1264 // For instructions that can be promoted to dot-new, try to promote. 1265 if (DepType == SDep::Data) { 1266 if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) { 1267 if (promoteToDotNew(I, DepType, II, RC)) { 1268 PromotedToDotNew = true; 1269 continue; 1270 } 1271 } 1272 if (HII->isNewValueJump(I)) 1273 continue; 1274 } 1275 1276 // For predicated instructions, if the predicates are complements then 1277 // there can be no dependence. 1278 if (HII->isPredicated(I) && HII->isPredicated(J) && 1279 arePredicatesComplements(I, J)) { 1280 // Not always safe to do this translation. 1281 // DAG Builder attempts to reduce dependence edges using transitive 1282 // nature of dependencies. Here is an example: 1283 // 1284 // r0 = tfr_pt ... (1) 1285 // r0 = tfr_pf ... (2) 1286 // r0 = tfr_pt ... (3) 1287 // 1288 // There will be an output dependence between (1)->(2) and (2)->(3). 1289 // However, there is no dependence edge between (1)->(3). This results 1290 // in all 3 instructions going in the same packet. We ignore dependce 1291 // only once to avoid this situation. 1292 auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J); 1293 if (Itr != IgnoreDepMIs.end()) { 1294 Dependence = true; 1295 return false; 1296 } 1297 IgnoreDepMIs.push_back(I); 1298 continue; 1299 } 1300 1301 // Ignore Order dependences between unconditional direct branches 1302 // and non-control-flow instructions. 1303 if (isDirectJump(I) && !J->isBranch() && !J->isCall() && 1304 DepType == SDep::Order) 1305 continue; 1306 1307 // Ignore all dependences for jumps except for true and output 1308 // dependences. 1309 if (I->isConditionalBranch() && DepType != SDep::Data && 1310 DepType != SDep::Output) 1311 continue; 1312 1313 // Ignore output dependences due to superregs. We can write to two 1314 // different subregisters of R1:0 for instance in the same cycle. 1315 1316 // If neither I nor J defines DepReg, then this is a superfluous output 1317 // dependence. The dependence must be of the form: 1318 // R0 = ... 1319 // R1 = ... 1320 // and there is an output dependence between the two instructions with 1321 // DepReg = D0. 1322 // We want to ignore these dependences. Ideally, the dependence 1323 // constructor should annotate such dependences. We can then avoid this 1324 // relatively expensive check. 1325 // 1326 if (DepType == SDep::Output) { 1327 // DepReg is the register that's responsible for the dependence. 1328 unsigned DepReg = SUJ->Succs[i].getReg(); 1329 1330 // Check if I and J really defines DepReg. 1331 if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg)) 1332 continue; 1333 FoundSequentialDependence = true; 1334 break; 1335 } 1336 1337 // For Order dependences: 1338 // 1. On V4 or later, volatile loads/stores can be packetized together, 1339 // unless other rules prevent is. 1340 // 2. Store followed by a load is not allowed. 1341 // 3. Store followed by a store is only valid on V4 or later. 1342 // 4. Load followed by any memory operation is allowed. 1343 if (DepType == SDep::Order) { 1344 if (!PacketizeVolatiles) { 1345 bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef(); 1346 if (OrdRefs) { 1347 FoundSequentialDependence = true; 1348 break; 1349 } 1350 } 1351 // J is first, I is second. 1352 bool LoadJ = J->mayLoad(), StoreJ = J->mayStore(); 1353 bool LoadI = I->mayLoad(), StoreI = I->mayStore(); 1354 if (StoreJ) { 1355 // Two stores are only allowed on V4+. Load following store is never 1356 // allowed. 1357 if (LoadI) { 1358 FoundSequentialDependence = true; 1359 break; 1360 } 1361 } else if (!LoadJ || (!LoadI && !StoreI)) { 1362 // If J is neither load nor store, assume a dependency. 1363 // If J is a load, but I is neither, also assume a dependency. 1364 FoundSequentialDependence = true; 1365 break; 1366 } 1367 // Store followed by store: not OK on V2. 1368 // Store followed by load: not OK on all. 1369 // Load followed by store: OK on all. 1370 // Load followed by load: OK on all. 1371 continue; 1372 } 1373 1374 // For V4, special case ALLOCFRAME. Even though there is dependency 1375 // between ALLOCFRAME and subsequent store, allow it to be packetized 1376 // in a same packet. This implies that the store is using the caller's 1377 // SP. Hence, offset needs to be updated accordingly. 1378 if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) { 1379 unsigned Opc = I->getOpcode(); 1380 switch (Opc) { 1381 case Hexagon::S2_storerd_io: 1382 case Hexagon::S2_storeri_io: 1383 case Hexagon::S2_storerh_io: 1384 case Hexagon::S2_storerb_io: 1385 if (I->getOperand(0).getReg() == HRI->getStackRegister()) { 1386 int64_t Imm = I->getOperand(1).getImm(); 1387 int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE); 1388 if (HII->isValidOffset(Opc, NewOff)) { 1389 GlueAllocframeStore = true; 1390 // Since this store is to be glued with allocframe in the same 1391 // packet, it will use SP of the previous stack frame, i.e. 1392 // caller's SP. Therefore, we need to recalculate offset 1393 // according to this change. 1394 I->getOperand(1).setImm(NewOff); 1395 continue; 1396 } 1397 } 1398 default: 1399 break; 1400 } 1401 } 1402 1403 // Skip over anti-dependences. Two instructions that are anti-dependent 1404 // can share a packet. 1405 if (DepType != SDep::Anti) { 1406 FoundSequentialDependence = true; 1407 break; 1408 } 1409 } 1410 1411 if (FoundSequentialDependence) { 1412 Dependence = true; 1413 return false; 1414 } 1415 1416 return true; 1417 } 1418 1419 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) { 1420 MachineInstr *I = SUI->getInstr(); 1421 MachineInstr *J = SUJ->getInstr(); 1422 assert(I && J && "Unable to packetize null instruction!"); 1423 1424 if (cannotCoexist(I, J)) 1425 return false; 1426 1427 if (!Dependence) 1428 return true; 1429 1430 // Check if the instruction was promoted to a dot-new. If so, demote it 1431 // back into a dot-old. 1432 if (PromotedToDotNew) 1433 demoteToDotOld(I); 1434 1435 cleanUpDotCur(); 1436 // Check if the instruction (must be a store) was glued with an allocframe 1437 // instruction. If so, restore its offset to its original value, i.e. use 1438 // current SP instead of caller's SP. 1439 if (GlueAllocframeStore) { 1440 unsigned FrameSize = MF.getFrameInfo()->getStackSize(); 1441 MachineOperand &MOff = I->getOperand(1); 1442 MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE); 1443 } 1444 return false; 1445 } 1446 1447 1448 MachineBasicBlock::iterator 1449 HexagonPacketizerList::addToPacket(MachineInstr *MI) { 1450 MachineBasicBlock::iterator MII = MI; 1451 MachineBasicBlock *MBB = MI->getParent(); 1452 if (MI->isImplicitDef()) { 1453 unsigned R = MI->getOperand(0).getReg(); 1454 if (Hexagon::IntRegsRegClass.contains(R)) { 1455 MCSuperRegIterator S(R, HRI, false); 1456 MI->addOperand(MachineOperand::CreateReg(*S, true, true)); 1457 } 1458 return MII; 1459 } 1460 assert(ResourceTracker->canReserveResources(MI)); 1461 1462 bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI); 1463 bool Good = true; 1464 1465 if (GlueToNewValueJump) { 1466 MachineInstr *NvjMI = ++MII; 1467 // We need to put both instructions in the same packet: MI and NvjMI. 1468 // Either of them can require a constant extender. Try to add both to 1469 // the current packet, and if that fails, end the packet and start a 1470 // new one. 1471 ResourceTracker->reserveResources(MI); 1472 if (ExtMI) 1473 Good = tryAllocateResourcesForConstExt(true); 1474 1475 bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI); 1476 if (Good) { 1477 if (ResourceTracker->canReserveResources(NvjMI)) 1478 ResourceTracker->reserveResources(NvjMI); 1479 else 1480 Good = false; 1481 } 1482 if (Good && ExtNvjMI) 1483 Good = tryAllocateResourcesForConstExt(true); 1484 1485 if (!Good) { 1486 endPacket(MBB, MI); 1487 assert(ResourceTracker->canReserveResources(MI)); 1488 ResourceTracker->reserveResources(MI); 1489 if (ExtMI) { 1490 assert(canReserveResourcesForConstExt()); 1491 tryAllocateResourcesForConstExt(true); 1492 } 1493 assert(ResourceTracker->canReserveResources(NvjMI)); 1494 ResourceTracker->reserveResources(NvjMI); 1495 if (ExtNvjMI) { 1496 assert(canReserveResourcesForConstExt()); 1497 reserveResourcesForConstExt(); 1498 } 1499 } 1500 CurrentPacketMIs.push_back(MI); 1501 CurrentPacketMIs.push_back(NvjMI); 1502 return MII; 1503 } 1504 1505 ResourceTracker->reserveResources(MI); 1506 if (ExtMI && !tryAllocateResourcesForConstExt(true)) { 1507 endPacket(MBB, MI); 1508 if (PromotedToDotNew) 1509 demoteToDotOld(MI); 1510 ResourceTracker->reserveResources(MI); 1511 reserveResourcesForConstExt(); 1512 } 1513 1514 CurrentPacketMIs.push_back(MI); 1515 return MII; 1516 } 1517 1518 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB, 1519 MachineInstr *MI) { 1520 OldPacketMIs = CurrentPacketMIs; 1521 VLIWPacketizerList::endPacket(MBB, MI); 1522 } 1523 1524 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) { 1525 return !producesStall(MI); 1526 } 1527 1528 1529 // Return true when ConsMI uses a register defined by ProdMI. 1530 static bool isDependent(const MachineInstr *ProdMI, 1531 const MachineInstr *ConsMI) { 1532 if (!ProdMI->getOperand(0).isReg()) 1533 return false; 1534 unsigned DstReg = ProdMI->getOperand(0).getReg(); 1535 1536 for (auto &Op : ConsMI->operands()) 1537 if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg) 1538 // The MIs depend on each other. 1539 return true; 1540 1541 return false; 1542 } 1543 1544 // V60 forward scheduling. 1545 bool HexagonPacketizerList::producesStall(const MachineInstr *I) { 1546 // Check whether the previous packet is in a different loop. If this is the 1547 // case, there is little point in trying to avoid a stall because that would 1548 // favor the rare case (loop entry) over the common case (loop iteration). 1549 // 1550 // TODO: We should really be able to check all the incoming edges if this is 1551 // the first packet in a basic block, so we can avoid stalls from the loop 1552 // backedge. 1553 if (!OldPacketMIs.empty()) { 1554 auto *OldBB = OldPacketMIs.front()->getParent(); 1555 auto *ThisBB = I->getParent(); 1556 if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB)) 1557 return false; 1558 } 1559 1560 // Check for stall between two vector instructions. 1561 if (HII->isV60VectorInstruction(I)) { 1562 for (auto J : OldPacketMIs) { 1563 if (!HII->isV60VectorInstruction(J)) 1564 continue; 1565 if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I)) 1566 return true; 1567 } 1568 return false; 1569 } 1570 1571 // Check for stall between two scalar instructions. First, check that 1572 // there is no definition of a use in the current packet, because it 1573 // may be a candidate for .new. 1574 for (auto J : CurrentPacketMIs) 1575 if (!HII->isV60VectorInstruction(J) && isDependent(J, I)) 1576 return false; 1577 1578 // Check for stall between I and instructions in the previous packet. 1579 if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) { 1580 for (auto J : OldPacketMIs) { 1581 if (HII->isV60VectorInstruction(J)) 1582 continue; 1583 if (!HII->isLateInstrFeedsEarlyInstr(J, I)) 1584 continue; 1585 if (isDependent(J, I) && !HII->canExecuteInBundle(J, I)) 1586 return true; 1587 } 1588 } 1589 1590 return false; 1591 } 1592 1593 1594 //===----------------------------------------------------------------------===// 1595 // Public Constructor Functions 1596 //===----------------------------------------------------------------------===// 1597 1598 FunctionPass *llvm::createHexagonPacketizer() { 1599 return new HexagonPacketizer(); 1600 } 1601 1602