Home | History | Annotate | Download | only in Hexagon
      1 //===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This implements a simple VLIW packetizer using DFA. The packetizer works on
     11 // machine basic blocks. For each instruction I in BB, the packetizer consults
     12 // the DFA to see if machine resources are available to execute I. If so, the
     13 // packetizer checks if I depends on any instruction J in the current packet.
     14 // If no dependency is found, I is added to current packet and machine resource
     15 // is marked as taken. If any dependency is found, a target API call is made to
     16 // prune the dependence.
     17 //
     18 //===----------------------------------------------------------------------===//
     19 #include "HexagonRegisterInfo.h"
     20 #include "HexagonSubtarget.h"
     21 #include "HexagonTargetMachine.h"
     22 #include "HexagonVLIWPacketizer.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/CodeGen/MachineDominators.h"
     25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     26 #include "llvm/CodeGen/MachineFunctionPass.h"
     27 #include "llvm/CodeGen/MachineLoopInfo.h"
     28 #include "llvm/CodeGen/MachineRegisterInfo.h"
     29 #include "llvm/CodeGen/Passes.h"
     30 #include "llvm/Support/CommandLine.h"
     31 #include "llvm/Support/Debug.h"
     32 
     33 using namespace llvm;
     34 
     35 #define DEBUG_TYPE "packets"
     36 
     37 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
     38   cl::ZeroOrMore, cl::init(false),
     39   cl::desc("Disable Hexagon packetizer pass"));
     40 
     41 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
     42   cl::ZeroOrMore, cl::Hidden, cl::init(true),
     43   cl::desc("Allow non-solo packetization of volatile memory references"));
     44 
     45 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
     46   cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
     47 
     48 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
     49   cl::init(false), cl::Hidden, cl::ZeroOrMore,
     50   cl::desc("Disable vector double new-value-stores"));
     51 
     52 extern cl::opt<bool> ScheduleInlineAsm;
     53 
     54 namespace llvm {
     55   FunctionPass *createHexagonPacketizer();
     56   void initializeHexagonPacketizerPass(PassRegistry&);
     57 }
     58 
     59 
     60 namespace {
     61   class HexagonPacketizer : public MachineFunctionPass {
     62   public:
     63     static char ID;
     64     HexagonPacketizer() : MachineFunctionPass(ID) {
     65       initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
     66     }
     67 
     68     void getAnalysisUsage(AnalysisUsage &AU) const override {
     69       AU.setPreservesCFG();
     70       AU.addRequired<AAResultsWrapperPass>();
     71       AU.addRequired<MachineBranchProbabilityInfo>();
     72       AU.addRequired<MachineDominatorTree>();
     73       AU.addRequired<MachineLoopInfo>();
     74       AU.addPreserved<MachineDominatorTree>();
     75       AU.addPreserved<MachineLoopInfo>();
     76       MachineFunctionPass::getAnalysisUsage(AU);
     77     }
     78     const char *getPassName() const override {
     79       return "Hexagon Packetizer";
     80     }
     81     bool runOnMachineFunction(MachineFunction &Fn) override;
     82     MachineFunctionProperties getRequiredProperties() const override {
     83       return MachineFunctionProperties().set(
     84           MachineFunctionProperties::Property::AllVRegsAllocated);
     85     }
     86 
     87   private:
     88     const HexagonInstrInfo *HII;
     89     const HexagonRegisterInfo *HRI;
     90   };
     91 
     92   char HexagonPacketizer::ID = 0;
     93 }
     94 
     95 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
     96                       false, false)
     97 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     98 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
     99 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    100 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
    101 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
    102                     false, false)
    103 
    104 
    105 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
    106       MachineLoopInfo &MLI, AliasAnalysis *AA,
    107       const MachineBranchProbabilityInfo *MBPI)
    108     : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
    109   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    110   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    111 }
    112 
    113 // Check if FirstI modifies a register that SecondI reads.
    114 static bool hasWriteToReadDep(const MachineInstr &FirstI,
    115                               const MachineInstr &SecondI,
    116                               const TargetRegisterInfo *TRI) {
    117   for (auto &MO : FirstI.operands()) {
    118     if (!MO.isReg() || !MO.isDef())
    119       continue;
    120     unsigned R = MO.getReg();
    121     if (SecondI.readsRegister(R, TRI))
    122       return true;
    123   }
    124   return false;
    125 }
    126 
    127 
    128 static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
    129       MachineBasicBlock::iterator BundleIt, bool Before) {
    130   MachineBasicBlock::instr_iterator InsertPt;
    131   if (Before)
    132     InsertPt = BundleIt.getInstrIterator();
    133   else
    134     InsertPt = std::next(BundleIt).getInstrIterator();
    135 
    136   MachineBasicBlock &B = *MI->getParent();
    137   // The instruction should at least be bundled with the preceding instruction
    138   // (there will always be one, i.e. BUNDLE, if nothing else).
    139   assert(MI->isBundledWithPred());
    140   if (MI->isBundledWithSucc()) {
    141     MI->clearFlag(MachineInstr::BundledSucc);
    142     MI->clearFlag(MachineInstr::BundledPred);
    143   } else {
    144     // If it's not bundled with the successor (i.e. it is the last one
    145     // in the bundle), then we can simply unbundle it from the predecessor,
    146     // which will take care of updating the predecessor's flag.
    147     MI->unbundleFromPred();
    148   }
    149   B.splice(InsertPt, &B, MI);
    150 
    151   // Get the size of the bundle without asserting.
    152   MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
    153   MachineBasicBlock::const_instr_iterator E = B.instr_end();
    154   unsigned Size = 0;
    155   for (++I; I != E && I->isBundledWithPred(); ++I)
    156     ++Size;
    157 
    158   // If there are still two or more instructions, then there is nothing
    159   // else to be done.
    160   if (Size > 1)
    161     return BundleIt;
    162 
    163   // Otherwise, extract the single instruction out and delete the bundle.
    164   MachineBasicBlock::iterator NextIt = std::next(BundleIt);
    165   MachineInstr *SingleI = BundleIt->getNextNode();
    166   SingleI->unbundleFromPred();
    167   assert(!SingleI->isBundledWithSucc());
    168   BundleIt->eraseFromParent();
    169   return NextIt;
    170 }
    171 
    172 
    173 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
    174   if (DisablePacketizer || skipFunction(*MF.getFunction()))
    175     return false;
    176 
    177   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    178   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    179   auto &MLI = getAnalysis<MachineLoopInfo>();
    180   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    181   auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
    182 
    183   if (EnableGenAllInsnClass)
    184     HII->genAllInsnTimingClasses(MF);
    185 
    186   // Instantiate the packetizer.
    187   HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
    188 
    189   // DFA state table should not be empty.
    190   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
    191 
    192   //
    193   // Loop over all basic blocks and remove KILL pseudo-instructions
    194   // These instructions confuse the dependence analysis. Consider:
    195   // D0 = ...   (Insn 0)
    196   // R0 = KILL R0, D0 (Insn 1)
    197   // R0 = ... (Insn 2)
    198   // Here, Insn 1 will result in the dependence graph not emitting an output
    199   // dependence between Insn 0 and Insn 2. This can lead to incorrect
    200   // packetization
    201   //
    202   for (auto &MB : MF) {
    203     auto End = MB.end();
    204     auto MI = MB.begin();
    205     while (MI != End) {
    206       auto NextI = std::next(MI);
    207       if (MI->isKill()) {
    208         MB.erase(MI);
    209         End = MB.end();
    210       }
    211       MI = NextI;
    212     }
    213   }
    214 
    215   // Loop over all of the basic blocks.
    216   for (auto &MB : MF) {
    217     auto Begin = MB.begin(), End = MB.end();
    218     while (Begin != End) {
    219       // First the first non-boundary starting from the end of the last
    220       // scheduling region.
    221       MachineBasicBlock::iterator RB = Begin;
    222       while (RB != End && HII->isSchedulingBoundary(*RB, &MB, MF))
    223         ++RB;
    224       // First the first boundary starting from the beginning of the new
    225       // region.
    226       MachineBasicBlock::iterator RE = RB;
    227       while (RE != End && !HII->isSchedulingBoundary(*RE, &MB, MF))
    228         ++RE;
    229       // Add the scheduling boundary if it's not block end.
    230       if (RE != End)
    231         ++RE;
    232       // If RB == End, then RE == End.
    233       if (RB != End)
    234         Packetizer.PacketizeMIs(&MB, RB, RE);
    235 
    236       Begin = RE;
    237     }
    238   }
    239 
    240   Packetizer.unpacketizeSoloInstrs(MF);
    241   return true;
    242 }
    243 
    244 
    245 // Reserve resources for a constant extender. Trigger an assertion if the
    246 // reservation fails.
    247 void HexagonPacketizerList::reserveResourcesForConstExt() {
    248   if (!tryAllocateResourcesForConstExt(true))
    249     llvm_unreachable("Resources not available");
    250 }
    251 
    252 bool HexagonPacketizerList::canReserveResourcesForConstExt() {
    253   return tryAllocateResourcesForConstExt(false);
    254 }
    255 
    256 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
    257 // return true, otherwise, return false.
    258 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
    259   auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
    260   bool Avail = ResourceTracker->canReserveResources(*ExtMI);
    261   if (Reserve && Avail)
    262     ResourceTracker->reserveResources(*ExtMI);
    263   MF.DeleteMachineInstr(ExtMI);
    264   return Avail;
    265 }
    266 
    267 
    268 bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
    269       SDep::Kind DepType, unsigned DepReg) {
    270   // Check for LR dependence.
    271   if (DepReg == HRI->getRARegister())
    272     return true;
    273 
    274   if (HII->isDeallocRet(MI))
    275     if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
    276       return true;
    277 
    278   // Check if this is a predicate dependence.
    279   const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
    280   if (RC == &Hexagon::PredRegsRegClass)
    281     return true;
    282 
    283   // Assumes that the first operand of the CALLr is the function address.
    284   if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
    285     MachineOperand MO = MI->getOperand(0);
    286     if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
    287       return true;
    288   }
    289 
    290   return false;
    291 }
    292 
    293 static bool isRegDependence(const SDep::Kind DepType) {
    294   return DepType == SDep::Data || DepType == SDep::Anti ||
    295          DepType == SDep::Output;
    296 }
    297 
    298 static bool isDirectJump(const MachineInstr* MI) {
    299   return MI->getOpcode() == Hexagon::J2_jump;
    300 }
    301 
    302 static bool isSchedBarrier(const MachineInstr* MI) {
    303   switch (MI->getOpcode()) {
    304   case Hexagon::Y2_barrier:
    305     return true;
    306   }
    307   return false;
    308 }
    309 
    310 static bool isControlFlow(const MachineInstr* MI) {
    311   return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
    312 }
    313 
    314 
    315 /// Returns true if the instruction modifies a callee-saved register.
    316 static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
    317                                      const TargetRegisterInfo *TRI) {
    318   const MachineFunction &MF = *MI->getParent()->getParent();
    319   for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
    320     if (MI->modifiesRegister(*CSR, TRI))
    321       return true;
    322   return false;
    323 }
    324 
    325 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
    326 // Returns true if an instruction can be promoted to .new predicate or
    327 // new-value store.
    328 bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
    329   return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
    330 }
    331 
    332 // Promote an instructiont to its .cur form.
    333 // At this time, we have already made a call to canPromoteToDotCur and made
    334 // sure that it can *indeed* be promoted.
    335 bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
    336       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    337       const TargetRegisterClass* RC) {
    338   assert(DepType == SDep::Data);
    339   int CurOpcode = HII->getDotCurOp(MI);
    340   MI->setDesc(HII->get(CurOpcode));
    341   return true;
    342 }
    343 
    344 void HexagonPacketizerList::cleanUpDotCur() {
    345   MachineInstr *MI = NULL;
    346   for (auto BI : CurrentPacketMIs) {
    347     DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
    348     if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
    349       MI = BI;
    350       continue;
    351     }
    352     if (MI) {
    353       for (auto &MO : BI->operands())
    354         if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
    355           return;
    356     }
    357   }
    358   if (!MI)
    359     return;
    360   // We did not find a use of the CUR, so de-cur it.
    361   MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
    362   DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
    363 }
    364 
    365 // Check to see if an instruction can be dot cur.
    366 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
    367       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    368       const TargetRegisterClass *RC) {
    369   if (!HII->isV60VectorInstruction(MI))
    370     return false;
    371   if (!HII->isV60VectorInstruction(&*MII))
    372     return false;
    373 
    374   // Already a dot new instruction.
    375   if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
    376     return false;
    377 
    378   if (!HII->mayBeCurLoad(MI))
    379     return false;
    380 
    381   // The "cur value" cannot come from inline asm.
    382   if (PacketSU->getInstr()->isInlineAsm())
    383     return false;
    384 
    385   // Make sure candidate instruction uses cur.
    386   DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
    387         MI->dump();
    388         dbgs() << "in packet\n";);
    389   MachineInstr &MJ = *MII;
    390   DEBUG({
    391     dbgs() << "Checking CUR against ";
    392     MJ.dump();
    393   });
    394   unsigned DestReg = MI->getOperand(0).getReg();
    395   bool FoundMatch = false;
    396   for (auto &MO : MJ.operands())
    397     if (MO.isReg() && MO.getReg() == DestReg)
    398       FoundMatch = true;
    399   if (!FoundMatch)
    400     return false;
    401 
    402   // Check for existing uses of a vector register within the packet which
    403   // would be affected by converting a vector load into .cur formt.
    404   for (auto BI : CurrentPacketMIs) {
    405     DEBUG(dbgs() << "packet has "; BI->dump(););
    406     if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
    407       return false;
    408   }
    409 
    410   DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
    411   // We can convert the opcode into a .cur.
    412   return true;
    413 }
    414 
    415 // Promote an instruction to its .new form. At this time, we have already
    416 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
    417 // promoted.
    418 bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
    419       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    420       const TargetRegisterClass* RC) {
    421   assert (DepType == SDep::Data);
    422   int NewOpcode;
    423   if (RC == &Hexagon::PredRegsRegClass)
    424     NewOpcode = HII->getDotNewPredOp(MI, MBPI);
    425   else
    426     NewOpcode = HII->getDotNewOp(MI);
    427   MI->setDesc(HII->get(NewOpcode));
    428   return true;
    429 }
    430 
    431 bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
    432   int NewOpcode = HII->getDotOldOp(MI->getOpcode());
    433   MI->setDesc(HII->get(NewOpcode));
    434   return true;
    435 }
    436 
    437 enum PredicateKind {
    438   PK_False,
    439   PK_True,
    440   PK_Unknown
    441 };
    442 
    443 /// Returns true if an instruction is predicated on p0 and false if it's
    444 /// predicated on !p0.
    445 static PredicateKind getPredicateSense(const MachineInstr &MI,
    446                                        const HexagonInstrInfo *HII) {
    447   if (!HII->isPredicated(MI))
    448     return PK_Unknown;
    449   if (HII->isPredicatedTrue(MI))
    450     return PK_True;
    451   return PK_False;
    452 }
    453 
    454 static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
    455       const HexagonInstrInfo *HII) {
    456   assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
    457 #ifndef NDEBUG
    458   // Post Increment means duplicates. Use dense map to find duplicates in the
    459   // list. Caution: Densemap initializes with the minimum of 64 buckets,
    460   // whereas there are at most 5 operands in the post increment.
    461   DenseSet<unsigned> DefRegsSet;
    462   for (auto &MO : MI->operands())
    463     if (MO.isReg() && MO.isDef())
    464       DefRegsSet.insert(MO.getReg());
    465 
    466   for (auto &MO : MI->operands())
    467     if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
    468       return MO;
    469 #else
    470   if (MI->mayLoad()) {
    471     const MachineOperand &Op1 = MI->getOperand(1);
    472     // The 2nd operand is always the post increment operand in load.
    473     assert(Op1.isReg() && "Post increment operand has be to a register.");
    474     return Op1;
    475   }
    476   if (MI->getDesc().mayStore()) {
    477     const MachineOperand &Op0 = MI->getOperand(0);
    478     // The 1st operand is always the post increment operand in store.
    479     assert(Op0.isReg() && "Post increment operand has be to a register.");
    480     return Op0;
    481   }
    482 #endif
    483   // we should never come here.
    484   llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
    485 }
    486 
    487 // Get the value being stored.
    488 static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
    489   // value being stored is always the last operand.
    490   return MI->getOperand(MI->getNumOperands()-1);
    491 }
    492 
    493 static bool isLoadAbsSet(const MachineInstr *MI) {
    494   unsigned Opc = MI->getOpcode();
    495   switch (Opc) {
    496     case Hexagon::L4_loadrd_ap:
    497     case Hexagon::L4_loadrb_ap:
    498     case Hexagon::L4_loadrh_ap:
    499     case Hexagon::L4_loadrub_ap:
    500     case Hexagon::L4_loadruh_ap:
    501     case Hexagon::L4_loadri_ap:
    502       return true;
    503   }
    504   return false;
    505 }
    506 
    507 static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
    508   assert(isLoadAbsSet(MI));
    509   return MI->getOperand(1);
    510 }
    511 
    512 
    513 // Can be new value store?
    514 // Following restrictions are to be respected in convert a store into
    515 // a new value store.
    516 // 1. If an instruction uses auto-increment, its address register cannot
    517 //    be a new-value register. Arch Spec 5.4.2.1
    518 // 2. If an instruction uses absolute-set addressing mode, its address
    519 //    register cannot be a new-value register. Arch Spec 5.4.2.1.
    520 // 3. If an instruction produces a 64-bit result, its registers cannot be used
    521 //    as new-value registers. Arch Spec 5.4.2.2.
    522 // 4. If the instruction that sets the new-value register is conditional, then
    523 //    the instruction that uses the new-value register must also be conditional,
    524 //    and both must always have their predicates evaluate identically.
    525 //    Arch Spec 5.4.2.3.
    526 // 5. There is an implied restriction that a packet cannot have another store,
    527 //    if there is a new value store in the packet. Corollary: if there is
    528 //    already a store in a packet, there can not be a new value store.
    529 //    Arch Spec: 3.4.4.2
    530 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
    531       const MachineInstr *PacketMI, unsigned DepReg) {
    532   // Make sure we are looking at the store, that can be promoted.
    533   if (!HII->mayBeNewStore(MI))
    534     return false;
    535 
    536   // Make sure there is dependency and can be new value'd.
    537   const MachineOperand &Val = getStoreValueOperand(MI);
    538   if (Val.isReg() && Val.getReg() != DepReg)
    539     return false;
    540 
    541   const MCInstrDesc& MCID = PacketMI->getDesc();
    542 
    543   // First operand is always the result.
    544   const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
    545   // Double regs can not feed into new value store: PRM section: 5.4.2.2.
    546   if (PacketRC == &Hexagon::DoubleRegsRegClass)
    547     return false;
    548 
    549   // New-value stores are of class NV (slot 0), dual stores require class ST
    550   // in slot 0 (PRM 5.5).
    551   for (auto I : CurrentPacketMIs) {
    552     SUnit *PacketSU = MIToSUnit.find(I)->second;
    553     if (PacketSU->getInstr()->mayStore())
    554       return false;
    555   }
    556 
    557   // Make sure it's NOT the post increment register that we are going to
    558   // new value.
    559   if (HII->isPostIncrement(MI) &&
    560       getPostIncrementOperand(MI, HII).getReg() == DepReg) {
    561     return false;
    562   }
    563 
    564   if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
    565       getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
    566     // If source is post_inc, or absolute-set addressing, it can not feed
    567     // into new value store
    568     //   r3 = memw(r2++#4)
    569     //   memw(r30 + #-1404) = r2.new -> can not be new value store
    570     // arch spec section: 5.4.2.1.
    571     return false;
    572   }
    573 
    574   if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
    575     return false;
    576 
    577   // If the source that feeds the store is predicated, new value store must
    578   // also be predicated.
    579   if (HII->isPredicated(*PacketMI)) {
    580     if (!HII->isPredicated(*MI))
    581       return false;
    582 
    583     // Check to make sure that they both will have their predicates
    584     // evaluate identically.
    585     unsigned predRegNumSrc = 0;
    586     unsigned predRegNumDst = 0;
    587     const TargetRegisterClass* predRegClass = nullptr;
    588 
    589     // Get predicate register used in the source instruction.
    590     for (auto &MO : PacketMI->operands()) {
    591       if (!MO.isReg())
    592         continue;
    593       predRegNumSrc = MO.getReg();
    594       predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
    595       if (predRegClass == &Hexagon::PredRegsRegClass)
    596         break;
    597     }
    598     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    599         "predicate register not found in a predicated PacketMI instruction");
    600 
    601     // Get predicate register used in new-value store instruction.
    602     for (auto &MO : MI->operands()) {
    603       if (!MO.isReg())
    604         continue;
    605       predRegNumDst = MO.getReg();
    606       predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
    607       if (predRegClass == &Hexagon::PredRegsRegClass)
    608         break;
    609     }
    610     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    611            "predicate register not found in a predicated MI instruction");
    612 
    613     // New-value register producer and user (store) need to satisfy these
    614     // constraints:
    615     // 1) Both instructions should be predicated on the same register.
    616     // 2) If producer of the new-value register is .new predicated then store
    617     // should also be .new predicated and if producer is not .new predicated
    618     // then store should not be .new predicated.
    619     // 3) Both new-value register producer and user should have same predicate
    620     // sense, i.e, either both should be negated or both should be non-negated.
    621     if (predRegNumDst != predRegNumSrc ||
    622         HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
    623         getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII))
    624       return false;
    625   }
    626 
    627   // Make sure that other than the new-value register no other store instruction
    628   // register has been modified in the same packet. Predicate registers can be
    629   // modified by they should not be modified between the producer and the store
    630   // instruction as it will make them both conditional on different values.
    631   // We already know this to be true for all the instructions before and
    632   // including PacketMI. Howerver, we need to perform the check for the
    633   // remaining instructions in the packet.
    634 
    635   unsigned StartCheck = 0;
    636 
    637   for (auto I : CurrentPacketMIs) {
    638     SUnit *TempSU = MIToSUnit.find(I)->second;
    639     MachineInstr* TempMI = TempSU->getInstr();
    640 
    641     // Following condition is true for all the instructions until PacketMI is
    642     // reached (StartCheck is set to 0 before the for loop).
    643     // StartCheck flag is 1 for all the instructions after PacketMI.
    644     if (TempMI != PacketMI && !StartCheck) // Start processing only after
    645       continue;                            // encountering PacketMI.
    646 
    647     StartCheck = 1;
    648     if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
    649       continue;
    650 
    651     for (auto &MO : MI->operands())
    652       if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
    653         return false;
    654   }
    655 
    656   // Make sure that for non-POST_INC stores:
    657   // 1. The only use of reg is DepReg and no other registers.
    658   //    This handles V4 base+index registers.
    659   //    The following store can not be dot new.
    660   //    Eg.   r0 = add(r0, #3)
    661   //          memw(r1+r0<<#2) = r0
    662   if (!HII->isPostIncrement(MI)) {
    663     for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
    664       const MachineOperand &MO = MI->getOperand(opNum);
    665       if (MO.isReg() && MO.getReg() == DepReg)
    666         return false;
    667     }
    668   }
    669 
    670   // If data definition is because of implicit definition of the register,
    671   // do not newify the store. Eg.
    672   // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
    673   // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
    674   for (auto &MO : PacketMI->operands()) {
    675     if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
    676       continue;
    677     unsigned R = MO.getReg();
    678     if (R == DepReg || HRI->isSuperRegister(DepReg, R))
    679       return false;
    680   }
    681 
    682   // Handle imp-use of super reg case. There is a target independent side
    683   // change that should prevent this situation but I am handling it for
    684   // just-in-case. For example, we cannot newify R2 in the following case:
    685   // %R3<def> = A2_tfrsi 0;
    686   // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
    687   for (auto &MO : MI->operands()) {
    688     if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
    689       return false;
    690   }
    691 
    692   // Can be dot new store.
    693   return true;
    694 }
    695 
    696 // Can this MI to promoted to either new value store or new value jump.
    697 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
    698       const SUnit *PacketSU, unsigned DepReg,
    699       MachineBasicBlock::iterator &MII) {
    700   if (!HII->mayBeNewStore(MI))
    701     return false;
    702 
    703   // Check to see the store can be new value'ed.
    704   MachineInstr *PacketMI = PacketSU->getInstr();
    705   if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
    706     return true;
    707 
    708   // Check to see the compare/jump can be new value'ed.
    709   // This is done as a pass on its own. Don't need to check it here.
    710   return false;
    711 }
    712 
    713 static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
    714   for (auto &MO : I->operands())
    715     if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
    716       return true;
    717   return false;
    718 }
    719 
    720 // Check to see if an instruction can be dot new
    721 // There are three kinds.
    722 // 1. dot new on predicate - V2/V3/V4
    723 // 2. dot new on stores NV/ST - V4
    724 // 3. dot new on jump NV/J - V4 -- This is generated in a pass.
    725 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
    726       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    727       const TargetRegisterClass* RC) {
    728   // Already a dot new instruction.
    729   if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
    730     return false;
    731 
    732   if (!isNewifiable(MI))
    733     return false;
    734 
    735   const MachineInstr *PI = PacketSU->getInstr();
    736 
    737   // The "new value" cannot come from inline asm.
    738   if (PI->isInlineAsm())
    739     return false;
    740 
    741   // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
    742   // sense.
    743   if (PI->isImplicitDef())
    744     return false;
    745 
    746   // If dependency is trough an implicitly defined register, we should not
    747   // newify the use.
    748   if (isImplicitDependency(PI, DepReg))
    749     return false;
    750 
    751   const MCInstrDesc& MCID = PI->getDesc();
    752   const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
    753   if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
    754     return false;
    755 
    756   // predicate .new
    757   // bug 5670: until that is fixed
    758   // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
    759   if (RC == &Hexagon::PredRegsRegClass)
    760     if (HII->isCondInst(MI) || MI->isReturn())
    761       return HII->predCanBeUsedAsDotNew(PI, DepReg);
    762 
    763   if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
    764     return false;
    765 
    766   // Create a dot new machine instruction to see if resources can be
    767   // allocated. If not, bail out now.
    768   int NewOpcode = HII->getDotNewOp(MI);
    769   const MCInstrDesc &D = HII->get(NewOpcode);
    770   MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
    771   bool ResourcesAvailable = ResourceTracker->canReserveResources(*NewMI);
    772   MF.DeleteMachineInstr(NewMI);
    773   if (!ResourcesAvailable)
    774     return false;
    775 
    776   // New Value Store only. New Value Jump generated as a separate pass.
    777   if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
    778     return false;
    779 
    780   return true;
    781 }
    782 
    783 // Go through the packet instructions and search for an anti dependency between
    784 // them and DepReg from MI. Consider this case:
    785 // Trying to add
    786 // a) %R1<def> = TFRI_cdNotPt %P3, 2
    787 // to this packet:
    788 // {
    789 //   b) %P0<def> = C2_or %P3<kill>, %P0<kill>
    790 //   c) %P3<def> = C2_tfrrp %R23
    791 //   d) %R1<def> = C2_cmovenewit %P3, 4
    792 //  }
    793 // The P3 from a) and d) will be complements after
    794 // a)'s P3 is converted to .new form
    795 // Anti-dep between c) and b) is irrelevant for this case
    796 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
    797                                                         unsigned DepReg) {
    798   SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
    799 
    800   for (auto I : CurrentPacketMIs) {
    801     // We only care for dependencies to predicated instructions
    802     if (!HII->isPredicated(*I))
    803       continue;
    804 
    805     // Scheduling Unit for current insn in the packet
    806     SUnit *PacketSU = MIToSUnit.find(I)->second;
    807 
    808     // Look at dependencies between current members of the packet and
    809     // predicate defining instruction MI. Make sure that dependency is
    810     // on the exact register we care about.
    811     if (PacketSU->isSucc(PacketSUDep)) {
    812       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
    813         auto &Dep = PacketSU->Succs[i];
    814         if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
    815             Dep.getReg() == DepReg)
    816           return true;
    817       }
    818     }
    819   }
    820 
    821   return false;
    822 }
    823 
    824 
    825 /// Gets the predicate register of a predicated instruction.
    826 static unsigned getPredicatedRegister(MachineInstr &MI,
    827                                       const HexagonInstrInfo *QII) {
    828   /// We use the following rule: The first predicate register that is a use is
    829   /// the predicate register of a predicated instruction.
    830   assert(QII->isPredicated(MI) && "Must be predicated instruction");
    831 
    832   for (auto &Op : MI.operands()) {
    833     if (Op.isReg() && Op.getReg() && Op.isUse() &&
    834         Hexagon::PredRegsRegClass.contains(Op.getReg()))
    835       return Op.getReg();
    836   }
    837 
    838   llvm_unreachable("Unknown instruction operand layout");
    839   return 0;
    840 }
    841 
    842 // Given two predicated instructions, this function detects whether
    843 // the predicates are complements.
    844 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
    845                                                      MachineInstr &MI2) {
    846   // If we don't know the predicate sense of the instructions bail out early, we
    847   // need it later.
    848   if (getPredicateSense(MI1, HII) == PK_Unknown ||
    849       getPredicateSense(MI2, HII) == PK_Unknown)
    850     return false;
    851 
    852   // Scheduling unit for candidate.
    853   SUnit *SU = MIToSUnit[&MI1];
    854 
    855   // One corner case deals with the following scenario:
    856   // Trying to add
    857   // a) %R24<def> = A2_tfrt %P0, %R25
    858   // to this packet:
    859   // {
    860   //   b) %R25<def> = A2_tfrf %P0, %R24
    861   //   c) %P0<def> = C2_cmpeqi %R26, 1
    862   // }
    863   //
    864   // On general check a) and b) are complements, but presence of c) will
    865   // convert a) to .new form, and then it is not a complement.
    866   // We attempt to detect it by analyzing existing dependencies in the packet.
    867 
    868   // Analyze relationships between all existing members of the packet.
    869   // Look for Anti dependecy on the same predicate reg as used in the
    870   // candidate.
    871   for (auto I : CurrentPacketMIs) {
    872     // Scheduling Unit for current insn in the packet.
    873     SUnit *PacketSU = MIToSUnit.find(I)->second;
    874 
    875     // If this instruction in the packet is succeeded by the candidate...
    876     if (PacketSU->isSucc(SU)) {
    877       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
    878         auto Dep = PacketSU->Succs[i];
    879         // The corner case exist when there is true data dependency between
    880         // candidate and one of current packet members, this dep is on
    881         // predicate reg, and there already exist anti dep on the same pred in
    882         // the packet.
    883         if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
    884             Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
    885           // Here I know that I is predicate setting instruction with true
    886           // data dep to candidate on the register we care about - c) in the
    887           // above example. Now I need to see if there is an anti dependency
    888           // from c) to any other instruction in the same packet on the pred
    889           // reg of interest.
    890           if (restrictingDepExistInPacket(I, Dep.getReg()))
    891             return false;
    892         }
    893       }
    894     }
    895   }
    896 
    897   // If the above case does not apply, check regular complement condition.
    898   // Check that the predicate register is the same and that the predicate
    899   // sense is different We also need to differentiate .old vs. .new: !p0
    900   // is not complementary to p0.new.
    901   unsigned PReg1 = getPredicatedRegister(MI1, HII);
    902   unsigned PReg2 = getPredicatedRegister(MI2, HII);
    903   return PReg1 == PReg2 &&
    904          Hexagon::PredRegsRegClass.contains(PReg1) &&
    905          Hexagon::PredRegsRegClass.contains(PReg2) &&
    906          getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
    907          HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2);
    908 }
    909 
    910 // Initialize packetizer flags.
    911 void HexagonPacketizerList::initPacketizerState() {
    912   Dependence = false;
    913   PromotedToDotNew = false;
    914   GlueToNewValueJump = false;
    915   GlueAllocframeStore = false;
    916   FoundSequentialDependence = false;
    917 }
    918 
    919 // Ignore bundling of pseudo instructions.
    920 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr &MI,
    921                                                     const MachineBasicBlock *) {
    922   if (MI.isDebugValue())
    923     return true;
    924 
    925   if (MI.isCFIInstruction())
    926     return false;
    927 
    928   // We must print out inline assembly.
    929   if (MI.isInlineAsm())
    930     return false;
    931 
    932   if (MI.isImplicitDef())
    933     return false;
    934 
    935   // We check if MI has any functional units mapped to it. If it doesn't,
    936   // we ignore the instruction.
    937   const MCInstrDesc& TID = MI.getDesc();
    938   auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
    939   unsigned FuncUnits = IS->getUnits();
    940   return !FuncUnits;
    941 }
    942 
    943 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
    944   if (MI.isEHLabel() || MI.isCFIInstruction())
    945     return true;
    946 
    947   // Consider inline asm to not be a solo instruction by default.
    948   // Inline asm will be put in a packet temporarily, but then it will be
    949   // removed, and placed outside of the packet (before or after, depending
    950   // on dependencies).  This is to reduce the impact of inline asm as a
    951   // "packet splitting" instruction.
    952   if (MI.isInlineAsm() && !ScheduleInlineAsm)
    953     return true;
    954 
    955   // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
    956   // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
    957   // They must not be grouped with other instructions in a packet.
    958   if (isSchedBarrier(&MI))
    959     return true;
    960 
    961   if (HII->isSolo(&MI))
    962     return true;
    963 
    964   if (MI.getOpcode() == Hexagon::A2_nop)
    965     return true;
    966 
    967   return false;
    968 }
    969 
    970 
    971 // Quick check if instructions MI and MJ cannot coexist in the same packet.
    972 // Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
    973 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
    974 // For full test call this function twice:
    975 //   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
    976 // Doing the test only one way saves the amount of code in this function,
    977 // since every test would need to be repeated with the MI and MJ reversed.
    978 static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
    979       const HexagonInstrInfo &HII) {
    980   const MachineFunction *MF = MI->getParent()->getParent();
    981   if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
    982       HII.isHVXMemWithAIndirect(MI, MJ))
    983     return true;
    984 
    985   // An inline asm cannot be together with a branch, because we may not be
    986   // able to remove the asm out after packetizing (i.e. if the asm must be
    987   // moved past the bundle).  Similarly, two asms cannot be together to avoid
    988   // complications when determining their relative order outside of a bundle.
    989   if (MI->isInlineAsm())
    990     return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
    991            MJ->isCall() || MJ->isTerminator();
    992 
    993   // "False" really means that the quick check failed to determine if
    994   // I and J cannot coexist.
    995   return false;
    996 }
    997 
    998 
    999 // Full, symmetric check.
   1000 bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
   1001       const MachineInstr *MJ) {
   1002   return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
   1003 }
   1004 
   1005 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
   1006   for (auto &B : MF) {
   1007     MachineBasicBlock::iterator BundleIt;
   1008     MachineBasicBlock::instr_iterator NextI;
   1009     for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
   1010       NextI = std::next(I);
   1011       MachineInstr *MI = &*I;
   1012       if (MI->isBundle())
   1013         BundleIt = I;
   1014       if (!MI->isInsideBundle())
   1015         continue;
   1016 
   1017       // Decide on where to insert the instruction that we are pulling out.
   1018       // Debug instructions always go before the bundle, but the placement of
   1019       // INLINE_ASM depends on potential dependencies.  By default, try to
   1020       // put it before the bundle, but if the asm writes to a register that
   1021       // other instructions in the bundle read, then we need to place it
   1022       // after the bundle (to preserve the bundle semantics).
   1023       bool InsertBeforeBundle;
   1024       if (MI->isInlineAsm())
   1025         InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI);
   1026       else if (MI->isDebugValue())
   1027         InsertBeforeBundle = true;
   1028       else
   1029         continue;
   1030 
   1031       BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
   1032     }
   1033   }
   1034 }
   1035 
   1036 // Check if a given instruction is of class "system".
   1037 static bool isSystemInstr(const MachineInstr *MI) {
   1038   unsigned Opc = MI->getOpcode();
   1039   switch (Opc) {
   1040     case Hexagon::Y2_barrier:
   1041     case Hexagon::Y2_dcfetchbo:
   1042       return true;
   1043   }
   1044   return false;
   1045 }
   1046 
   1047 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
   1048                                               const MachineInstr *J) {
   1049   // The dependence graph may not include edges between dead definitions,
   1050   // so without extra checks, we could end up packetizing two instruction
   1051   // defining the same (dead) register.
   1052   if (I->isCall() || J->isCall())
   1053     return false;
   1054   if (HII->isPredicated(*I) || HII->isPredicated(*J))
   1055     return false;
   1056 
   1057   BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
   1058   for (auto &MO : I->operands()) {
   1059     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1060       continue;
   1061     DeadDefs[MO.getReg()] = true;
   1062   }
   1063 
   1064   for (auto &MO : J->operands()) {
   1065     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1066       continue;
   1067     unsigned R = MO.getReg();
   1068     if (R != Hexagon::USR_OVF && DeadDefs[R])
   1069       return true;
   1070   }
   1071   return false;
   1072 }
   1073 
   1074 bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
   1075                                                  const MachineInstr *J) {
   1076   // A save callee-save register function call can only be in a packet
   1077   // with instructions that don't write to the callee-save registers.
   1078   if ((HII->isSaveCalleeSavedRegsCall(I) &&
   1079        doesModifyCalleeSavedReg(J, HRI)) ||
   1080       (HII->isSaveCalleeSavedRegsCall(J) &&
   1081        doesModifyCalleeSavedReg(I, HRI)))
   1082     return true;
   1083 
   1084   // Two control flow instructions cannot go in the same packet.
   1085   if (isControlFlow(I) && isControlFlow(J))
   1086     return true;
   1087 
   1088   // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
   1089   // contain a speculative indirect jump,
   1090   // a new-value compare jump or a dealloc_return.
   1091   auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
   1092     if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
   1093       return true;
   1094     if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI))
   1095       return true;
   1096     return false;
   1097   };
   1098 
   1099   if (HII->isLoopN(I) && isBadForLoopN(J))
   1100     return true;
   1101   if (HII->isLoopN(J) && isBadForLoopN(I))
   1102     return true;
   1103 
   1104   // dealloc_return cannot appear in the same packet as a conditional or
   1105   // unconditional jump.
   1106   return HII->isDeallocRet(I) &&
   1107          (J->isBranch() || J->isCall() || J->isBarrier());
   1108 }
   1109 
   1110 bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
   1111                                                     const MachineInstr *J) {
   1112   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
   1113   bool StoreI = I->mayStore(), StoreJ = J->mayStore();
   1114   if ((SysI && StoreJ) || (SysJ && StoreI))
   1115     return true;
   1116 
   1117   if (StoreI && StoreJ) {
   1118     if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
   1119       return true;
   1120   } else {
   1121     // A memop cannot be in the same packet with another memop or a store.
   1122     // Two stores can be together, but here I and J cannot both be stores.
   1123     bool MopStI = HII->isMemOp(I) || StoreI;
   1124     bool MopStJ = HII->isMemOp(J) || StoreJ;
   1125     if (MopStI && MopStJ)
   1126       return true;
   1127   }
   1128 
   1129   return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
   1130 }
   1131 
   1132 // SUI is the current instruction that is out side of the current packet.
   1133 // SUJ is the current instruction inside the current packet against which that
   1134 // SUI will be packetized.
   1135 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   1136   MachineInstr *I = SUI->getInstr();
   1137   MachineInstr *J = SUJ->getInstr();
   1138   assert(I && J && "Unable to packetize null instruction!");
   1139 
   1140   // Clear IgnoreDepMIs when Packet starts.
   1141   if (CurrentPacketMIs.size() == 1)
   1142     IgnoreDepMIs.clear();
   1143 
   1144   MachineBasicBlock::iterator II = I;
   1145   const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
   1146 
   1147   // Solo instructions cannot go in the packet.
   1148   assert(!isSoloInstruction(*I) && "Unexpected solo instr!");
   1149 
   1150   if (cannotCoexist(I, J))
   1151     return false;
   1152 
   1153   Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
   1154   if (Dependence)
   1155     return false;
   1156 
   1157   // V4 allows dual stores. It does not allow second store, if the first
   1158   // store is not in SLOT0. New value store, new value jump, dealloc_return
   1159   // and memop always take SLOT0. Arch spec 3.4.4.2.
   1160   Dependence = hasV4SpecificDependence(I, J);
   1161   if (Dependence)
   1162     return false;
   1163 
   1164   // If an instruction feeds new value jump, glue it.
   1165   MachineBasicBlock::iterator NextMII = I;
   1166   ++NextMII;
   1167   if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) {
   1168     MachineInstr &NextMI = *NextMII;
   1169 
   1170     bool secondRegMatch = false;
   1171     const MachineOperand &NOp0 = NextMI.getOperand(0);
   1172     const MachineOperand &NOp1 = NextMI.getOperand(1);
   1173 
   1174     if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
   1175       secondRegMatch = true;
   1176 
   1177     for (auto I : CurrentPacketMIs) {
   1178       SUnit *PacketSU = MIToSUnit.find(I)->second;
   1179       MachineInstr *PI = PacketSU->getInstr();
   1180       // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
   1181       if (PI->isCall()) {
   1182         Dependence = true;
   1183         break;
   1184       }
   1185       // Validate:
   1186       // 1. Packet does not have a store in it.
   1187       // 2. If the first operand of the nvj is newified, and the second
   1188       //    operand is also a reg, it (second reg) is not defined in
   1189       //    the same packet.
   1190       // 3. If the second operand of the nvj is newified, (which means
   1191       //    first operand is also a reg), first reg is not defined in
   1192       //    the same packet.
   1193       if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
   1194           HII->isLoopN(PI)) {
   1195         Dependence = true;
   1196         break;
   1197       }
   1198       // Check #2/#3.
   1199       const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
   1200       if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
   1201         Dependence = true;
   1202         break;
   1203       }
   1204     }
   1205 
   1206     if (Dependence)
   1207       return false;
   1208     GlueToNewValueJump = true;
   1209   }
   1210 
   1211   // There no dependency between a prolog instruction and its successor.
   1212   if (!SUJ->isSucc(SUI))
   1213     return true;
   1214 
   1215   for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
   1216     if (FoundSequentialDependence)
   1217       break;
   1218 
   1219     if (SUJ->Succs[i].getSUnit() != SUI)
   1220       continue;
   1221 
   1222     SDep::Kind DepType = SUJ->Succs[i].getKind();
   1223     // For direct calls:
   1224     // Ignore register dependences for call instructions for packetization
   1225     // purposes except for those due to r31 and predicate registers.
   1226     //
   1227     // For indirect calls:
   1228     // Same as direct calls + check for true dependences to the register
   1229     // used in the indirect call.
   1230     //
   1231     // We completely ignore Order dependences for call instructions.
   1232     //
   1233     // For returns:
   1234     // Ignore register dependences for return instructions like jumpr,
   1235     // dealloc return unless we have dependencies on the explicit uses
   1236     // of the registers used by jumpr (like r31) or dealloc return
   1237     // (like r29 or r30).
   1238     //
   1239     // TODO: Currently, jumpr is handling only return of r31. So, the
   1240     // following logic (specificaly isCallDependent) is working fine.
   1241     // We need to enable jumpr for register other than r31 and then,
   1242     // we need to rework the last part, where it handles indirect call
   1243     // of that (isCallDependent) function. Bug 6216 is opened for this.
   1244     unsigned DepReg = 0;
   1245     const TargetRegisterClass *RC = nullptr;
   1246     if (DepType == SDep::Data) {
   1247       DepReg = SUJ->Succs[i].getReg();
   1248       RC = HRI->getMinimalPhysRegClass(DepReg);
   1249     }
   1250 
   1251     if (I->isCall() || I->isReturn() || HII->isTailCall(I)) {
   1252       if (!isRegDependence(DepType))
   1253         continue;
   1254       if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
   1255         continue;
   1256     }
   1257 
   1258     if (DepType == SDep::Data) {
   1259       if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
   1260         if (promoteToDotCur(J, DepType, II, RC))
   1261           continue;
   1262     }
   1263 
   1264     // Data dpendence ok if we have load.cur.
   1265     if (DepType == SDep::Data && HII->isDotCurInst(J)) {
   1266       if (HII->isV60VectorInstruction(I))
   1267         continue;
   1268     }
   1269 
   1270     // For instructions that can be promoted to dot-new, try to promote.
   1271     if (DepType == SDep::Data) {
   1272       if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
   1273         if (promoteToDotNew(I, DepType, II, RC)) {
   1274           PromotedToDotNew = true;
   1275           continue;
   1276         }
   1277       }
   1278       if (HII->isNewValueJump(I))
   1279         continue;
   1280     }
   1281 
   1282     // For predicated instructions, if the predicates are complements then
   1283     // there can be no dependence.
   1284     if (HII->isPredicated(*I) && HII->isPredicated(*J) &&
   1285         arePredicatesComplements(*I, *J)) {
   1286       // Not always safe to do this translation.
   1287       // DAG Builder attempts to reduce dependence edges using transitive
   1288       // nature of dependencies. Here is an example:
   1289       //
   1290       // r0 = tfr_pt ... (1)
   1291       // r0 = tfr_pf ... (2)
   1292       // r0 = tfr_pt ... (3)
   1293       //
   1294       // There will be an output dependence between (1)->(2) and (2)->(3).
   1295       // However, there is no dependence edge between (1)->(3). This results
   1296       // in all 3 instructions going in the same packet. We ignore dependce
   1297       // only once to avoid this situation.
   1298       auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
   1299       if (Itr != IgnoreDepMIs.end()) {
   1300         Dependence = true;
   1301         return false;
   1302       }
   1303       IgnoreDepMIs.push_back(I);
   1304       continue;
   1305     }
   1306 
   1307     // Ignore Order dependences between unconditional direct branches
   1308     // and non-control-flow instructions.
   1309     if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
   1310         DepType == SDep::Order)
   1311       continue;
   1312 
   1313     // Ignore all dependences for jumps except for true and output
   1314     // dependences.
   1315     if (I->isConditionalBranch() && DepType != SDep::Data &&
   1316         DepType != SDep::Output)
   1317       continue;
   1318 
   1319     // Ignore output dependences due to superregs. We can write to two
   1320     // different subregisters of R1:0 for instance in the same cycle.
   1321 
   1322     // If neither I nor J defines DepReg, then this is a superfluous output
   1323     // dependence. The dependence must be of the form:
   1324     //   R0 = ...
   1325     //   R1 = ...
   1326     // and there is an output dependence between the two instructions with
   1327     // DepReg = D0.
   1328     // We want to ignore these dependences. Ideally, the dependence
   1329     // constructor should annotate such dependences. We can then avoid this
   1330     // relatively expensive check.
   1331     //
   1332     if (DepType == SDep::Output) {
   1333       // DepReg is the register that's responsible for the dependence.
   1334       unsigned DepReg = SUJ->Succs[i].getReg();
   1335 
   1336       // Check if I and J really defines DepReg.
   1337       if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
   1338         continue;
   1339       FoundSequentialDependence = true;
   1340       break;
   1341     }
   1342 
   1343     // For Order dependences:
   1344     // 1. On V4 or later, volatile loads/stores can be packetized together,
   1345     //    unless other rules prevent is.
   1346     // 2. Store followed by a load is not allowed.
   1347     // 3. Store followed by a store is only valid on V4 or later.
   1348     // 4. Load followed by any memory operation is allowed.
   1349     if (DepType == SDep::Order) {
   1350       if (!PacketizeVolatiles) {
   1351         bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
   1352         if (OrdRefs) {
   1353           FoundSequentialDependence = true;
   1354           break;
   1355         }
   1356       }
   1357       // J is first, I is second.
   1358       bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
   1359       bool LoadI = I->mayLoad(), StoreI = I->mayStore();
   1360       if (StoreJ) {
   1361         // Two stores are only allowed on V4+. Load following store is never
   1362         // allowed.
   1363         if (LoadI) {
   1364           FoundSequentialDependence = true;
   1365           break;
   1366         }
   1367       } else if (!LoadJ || (!LoadI && !StoreI)) {
   1368         // If J is neither load nor store, assume a dependency.
   1369         // If J is a load, but I is neither, also assume a dependency.
   1370         FoundSequentialDependence = true;
   1371         break;
   1372       }
   1373       // Store followed by store: not OK on V2.
   1374       // Store followed by load: not OK on all.
   1375       // Load followed by store: OK on all.
   1376       // Load followed by load: OK on all.
   1377       continue;
   1378     }
   1379 
   1380     // For V4, special case ALLOCFRAME. Even though there is dependency
   1381     // between ALLOCFRAME and subsequent store, allow it to be packetized
   1382     // in a same packet. This implies that the store is using the caller's
   1383     // SP. Hence, offset needs to be updated accordingly.
   1384     if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
   1385       unsigned Opc = I->getOpcode();
   1386       switch (Opc) {
   1387         case Hexagon::S2_storerd_io:
   1388         case Hexagon::S2_storeri_io:
   1389         case Hexagon::S2_storerh_io:
   1390         case Hexagon::S2_storerb_io:
   1391           if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
   1392             int64_t Imm = I->getOperand(1).getImm();
   1393             int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
   1394             if (HII->isValidOffset(Opc, NewOff)) {
   1395               GlueAllocframeStore = true;
   1396               // Since this store is to be glued with allocframe in the same
   1397               // packet, it will use SP of the previous stack frame, i.e.
   1398               // caller's SP. Therefore, we need to recalculate offset
   1399               // according to this change.
   1400               I->getOperand(1).setImm(NewOff);
   1401               continue;
   1402             }
   1403           }
   1404         default:
   1405           break;
   1406       }
   1407     }
   1408 
   1409     // There are certain anti-dependencies that cannot be ignored.
   1410     // Specifically:
   1411     //   J2_call ... %R0<imp-def>   ; SUJ
   1412     //   R0 = ...                   ; SUI
   1413     // Those cannot be packetized together, since the call will observe
   1414     // the effect of the assignment to R0.
   1415     if (DepType == SDep::Anti && J->isCall()) {
   1416       // Check if I defines any volatile register. We should also check
   1417       // registers that the call may read, but these happen to be a
   1418       // subset of the volatile register set.
   1419       for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) {
   1420         if (!I->modifiesRegister(*P, HRI))
   1421           continue;
   1422         FoundSequentialDependence = true;
   1423         break;
   1424       }
   1425     }
   1426 
   1427     // Skip over remaining anti-dependences. Two instructions that are
   1428     // anti-dependent can share a packet, since in most such cases all
   1429     // operands are read before any modifications take place.
   1430     // The exceptions are branch and call instructions, since they are
   1431     // executed after all other instructions have completed (at least
   1432     // conceptually).
   1433     if (DepType != SDep::Anti) {
   1434       FoundSequentialDependence = true;
   1435       break;
   1436     }
   1437   }
   1438 
   1439   if (FoundSequentialDependence) {
   1440     Dependence = true;
   1441     return false;
   1442   }
   1443 
   1444   return true;
   1445 }
   1446 
   1447 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
   1448   MachineInstr *I = SUI->getInstr();
   1449   MachineInstr *J = SUJ->getInstr();
   1450   assert(I && J && "Unable to packetize null instruction!");
   1451 
   1452   if (cannotCoexist(I, J))
   1453     return false;
   1454 
   1455   if (!Dependence)
   1456     return true;
   1457 
   1458   // Check if the instruction was promoted to a dot-new. If so, demote it
   1459   // back into a dot-old.
   1460   if (PromotedToDotNew)
   1461     demoteToDotOld(I);
   1462 
   1463   cleanUpDotCur();
   1464   // Check if the instruction (must be a store) was glued with an allocframe
   1465   // instruction. If so, restore its offset to its original value, i.e. use
   1466   // current SP instead of caller's SP.
   1467   if (GlueAllocframeStore) {
   1468     unsigned FrameSize = MF.getFrameInfo()->getStackSize();
   1469     MachineOperand &MOff = I->getOperand(1);
   1470     MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
   1471   }
   1472   return false;
   1473 }
   1474 
   1475 MachineBasicBlock::iterator
   1476 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
   1477   MachineBasicBlock::iterator MII = MI;
   1478   MachineBasicBlock *MBB = MI.getParent();
   1479   if (MI.isImplicitDef()) {
   1480     unsigned R = MI.getOperand(0).getReg();
   1481     if (Hexagon::IntRegsRegClass.contains(R)) {
   1482       MCSuperRegIterator S(R, HRI, false);
   1483       MI.addOperand(MachineOperand::CreateReg(*S, true, true));
   1484     }
   1485     return MII;
   1486   }
   1487   assert(ResourceTracker->canReserveResources(MI));
   1488 
   1489   bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI);
   1490   bool Good = true;
   1491 
   1492   if (GlueToNewValueJump) {
   1493     MachineInstr &NvjMI = *++MII;
   1494     // We need to put both instructions in the same packet: MI and NvjMI.
   1495     // Either of them can require a constant extender. Try to add both to
   1496     // the current packet, and if that fails, end the packet and start a
   1497     // new one.
   1498     ResourceTracker->reserveResources(MI);
   1499     if (ExtMI)
   1500       Good = tryAllocateResourcesForConstExt(true);
   1501 
   1502     bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI);
   1503     if (Good) {
   1504       if (ResourceTracker->canReserveResources(NvjMI))
   1505         ResourceTracker->reserveResources(NvjMI);
   1506       else
   1507         Good = false;
   1508     }
   1509     if (Good && ExtNvjMI)
   1510       Good = tryAllocateResourcesForConstExt(true);
   1511 
   1512     if (!Good) {
   1513       endPacket(MBB, MI);
   1514       assert(ResourceTracker->canReserveResources(MI));
   1515       ResourceTracker->reserveResources(MI);
   1516       if (ExtMI) {
   1517         assert(canReserveResourcesForConstExt());
   1518         tryAllocateResourcesForConstExt(true);
   1519       }
   1520       assert(ResourceTracker->canReserveResources(NvjMI));
   1521       ResourceTracker->reserveResources(NvjMI);
   1522       if (ExtNvjMI) {
   1523         assert(canReserveResourcesForConstExt());
   1524         reserveResourcesForConstExt();
   1525       }
   1526     }
   1527     CurrentPacketMIs.push_back(&MI);
   1528     CurrentPacketMIs.push_back(&NvjMI);
   1529     return MII;
   1530   }
   1531 
   1532   ResourceTracker->reserveResources(MI);
   1533   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
   1534     endPacket(MBB, MI);
   1535     if (PromotedToDotNew)
   1536       demoteToDotOld(&MI);
   1537     ResourceTracker->reserveResources(MI);
   1538     reserveResourcesForConstExt();
   1539   }
   1540 
   1541   CurrentPacketMIs.push_back(&MI);
   1542   return MII;
   1543 }
   1544 
   1545 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
   1546                                       MachineBasicBlock::iterator MI) {
   1547   OldPacketMIs = CurrentPacketMIs;
   1548   VLIWPacketizerList::endPacket(MBB, MI);
   1549 }
   1550 
   1551 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
   1552   return !producesStall(&MI);
   1553 }
   1554 
   1555 
   1556 // Return true when ConsMI uses a register defined by ProdMI.
   1557 static bool isDependent(const MachineInstr *ProdMI,
   1558       const MachineInstr *ConsMI) {
   1559   if (!ProdMI->getOperand(0).isReg())
   1560     return false;
   1561   unsigned DstReg = ProdMI->getOperand(0).getReg();
   1562 
   1563   for (auto &Op : ConsMI->operands())
   1564     if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
   1565       // The MIs depend on each other.
   1566       return true;
   1567 
   1568   return false;
   1569 }
   1570 
   1571 // V60 forward scheduling.
   1572 bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
   1573   // Check whether the previous packet is in a different loop. If this is the
   1574   // case, there is little point in trying to avoid a stall because that would
   1575   // favor the rare case (loop entry) over the common case (loop iteration).
   1576   //
   1577   // TODO: We should really be able to check all the incoming edges if this is
   1578   // the first packet in a basic block, so we can avoid stalls from the loop
   1579   // backedge.
   1580   if (!OldPacketMIs.empty()) {
   1581     auto *OldBB = OldPacketMIs.front()->getParent();
   1582     auto *ThisBB = I->getParent();
   1583     if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
   1584       return false;
   1585   }
   1586 
   1587   // Check for stall between two vector instructions.
   1588   if (HII->isV60VectorInstruction(I)) {
   1589     for (auto J : OldPacketMIs) {
   1590       if (!HII->isV60VectorInstruction(J))
   1591         continue;
   1592       if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
   1593         return true;
   1594     }
   1595     return false;
   1596   }
   1597 
   1598   // Check for stall between two scalar instructions. First, check that
   1599   // there is no definition of a use in the current packet, because it
   1600   // may be a candidate for .new.
   1601   for (auto J : CurrentPacketMIs)
   1602     if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
   1603       return false;
   1604 
   1605   // Check for stall between I and instructions in the previous packet.
   1606   if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
   1607     for (auto J : OldPacketMIs) {
   1608       if (HII->isV60VectorInstruction(J))
   1609         continue;
   1610       if (!HII->isLateInstrFeedsEarlyInstr(J, I))
   1611         continue;
   1612       if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
   1613         return true;
   1614     }
   1615   }
   1616 
   1617   return false;
   1618 }
   1619 
   1620 
   1621 //===----------------------------------------------------------------------===//
   1622 //                         Public Constructor Functions
   1623 //===----------------------------------------------------------------------===//
   1624 
   1625 FunctionPass *llvm::createHexagonPacketizer() {
   1626   return new HexagonPacketizer();
   1627 }
   1628