Home | History | Annotate | Download | only in Hexagon
      1 //===----- HexagonPacketizer.cpp - vliw packetizer ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This implements a simple VLIW packetizer using DFA. The packetizer works on
     11 // machine basic blocks. For each instruction I in BB, the packetizer consults
     12 // the DFA to see if machine resources are available to execute I. If so, the
     13 // packetizer checks if I depends on any instruction J in the current packet.
     14 // If no dependency is found, I is added to current packet and machine resource
     15 // is marked as taken. If any dependency is found, a target API call is made to
     16 // prune the dependence.
     17 //
     18 //===----------------------------------------------------------------------===//
     19 #include "HexagonRegisterInfo.h"
     20 #include "HexagonSubtarget.h"
     21 #include "HexagonTargetMachine.h"
     22 #include "HexagonVLIWPacketizer.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/CodeGen/MachineDominators.h"
     25 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     26 #include "llvm/CodeGen/MachineFunctionPass.h"
     27 #include "llvm/CodeGen/MachineLoopInfo.h"
     28 #include "llvm/CodeGen/MachineRegisterInfo.h"
     29 #include "llvm/CodeGen/Passes.h"
     30 #include "llvm/Support/CommandLine.h"
     31 #include "llvm/Support/Debug.h"
     32 #include <map>
     33 #include <vector>
     34 
     35 using namespace llvm;
     36 
     37 #define DEBUG_TYPE "packets"
     38 
     39 static cl::opt<bool> DisablePacketizer("disable-packetizer", cl::Hidden,
     40   cl::ZeroOrMore, cl::init(false),
     41   cl::desc("Disable Hexagon packetizer pass"));
     42 
     43 static cl::opt<bool> PacketizeVolatiles("hexagon-packetize-volatiles",
     44   cl::ZeroOrMore, cl::Hidden, cl::init(true),
     45   cl::desc("Allow non-solo packetization of volatile memory references"));
     46 
     47 static cl::opt<bool> EnableGenAllInsnClass("enable-gen-insn", cl::init(false),
     48   cl::Hidden, cl::ZeroOrMore, cl::desc("Generate all instruction with TC"));
     49 
     50 static cl::opt<bool> DisableVecDblNVStores("disable-vecdbl-nv-stores",
     51   cl::init(false), cl::Hidden, cl::ZeroOrMore,
     52   cl::desc("Disable vector double new-value-stores"));
     53 
     54 extern cl::opt<bool> ScheduleInlineAsm;
     55 
     56 namespace llvm {
     57   FunctionPass *createHexagonPacketizer();
     58   void initializeHexagonPacketizerPass(PassRegistry&);
     59 }
     60 
     61 
     62 namespace {
     63   class HexagonPacketizer : public MachineFunctionPass {
     64   public:
     65     static char ID;
     66     HexagonPacketizer() : MachineFunctionPass(ID) {
     67       initializeHexagonPacketizerPass(*PassRegistry::getPassRegistry());
     68     }
     69 
     70     void getAnalysisUsage(AnalysisUsage &AU) const override {
     71       AU.setPreservesCFG();
     72       AU.addRequired<AAResultsWrapperPass>();
     73       AU.addRequired<MachineBranchProbabilityInfo>();
     74       AU.addRequired<MachineDominatorTree>();
     75       AU.addRequired<MachineLoopInfo>();
     76       AU.addPreserved<MachineDominatorTree>();
     77       AU.addPreserved<MachineLoopInfo>();
     78       MachineFunctionPass::getAnalysisUsage(AU);
     79     }
     80     const char *getPassName() const override {
     81       return "Hexagon Packetizer";
     82     }
     83     bool runOnMachineFunction(MachineFunction &Fn) override;
     84 
     85   private:
     86     const HexagonInstrInfo *HII;
     87     const HexagonRegisterInfo *HRI;
     88   };
     89 
     90   char HexagonPacketizer::ID = 0;
     91 }
     92 
     93 INITIALIZE_PASS_BEGIN(HexagonPacketizer, "packets", "Hexagon Packetizer",
     94                       false, false)
     95 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
     96 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
     97 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
     98 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
     99 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
    100                     false, false)
    101 
    102 
    103 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
    104       MachineLoopInfo &MLI, AliasAnalysis *AA,
    105       const MachineBranchProbabilityInfo *MBPI)
    106     : VLIWPacketizerList(MF, MLI, AA), MBPI(MBPI), MLI(&MLI) {
    107   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    108   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    109 }
    110 
    111 // Check if FirstI modifies a register that SecondI reads.
    112 static bool hasWriteToReadDep(const MachineInstr *FirstI,
    113       const MachineInstr *SecondI, const TargetRegisterInfo *TRI) {
    114   for (auto &MO : FirstI->operands()) {
    115     if (!MO.isReg() || !MO.isDef())
    116       continue;
    117     unsigned R = MO.getReg();
    118     if (SecondI->readsRegister(R, TRI))
    119       return true;
    120   }
    121   return false;
    122 }
    123 
    124 
    125 static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
    126       MachineBasicBlock::iterator BundleIt, bool Before) {
    127   MachineBasicBlock::instr_iterator InsertPt;
    128   if (Before)
    129     InsertPt = BundleIt.getInstrIterator();
    130   else
    131     InsertPt = std::next(BundleIt).getInstrIterator();
    132 
    133   MachineBasicBlock &B = *MI->getParent();
    134   // The instruction should at least be bundled with the preceding instruction
    135   // (there will always be one, i.e. BUNDLE, if nothing else).
    136   assert(MI->isBundledWithPred());
    137   if (MI->isBundledWithSucc()) {
    138     MI->clearFlag(MachineInstr::BundledSucc);
    139     MI->clearFlag(MachineInstr::BundledPred);
    140   } else {
    141     // If it's not bundled with the successor (i.e. it is the last one
    142     // in the bundle), then we can simply unbundle it from the predecessor,
    143     // which will take care of updating the predecessor's flag.
    144     MI->unbundleFromPred();
    145   }
    146   B.splice(InsertPt, &B, MI);
    147 
    148   // Get the size of the bundle without asserting.
    149   MachineBasicBlock::const_instr_iterator I(BundleIt);
    150   MachineBasicBlock::const_instr_iterator E = B.instr_end();
    151   unsigned Size = 0;
    152   for (++I; I != E && I->isBundledWithPred(); ++I)
    153     ++Size;
    154 
    155   // If there are still two or more instructions, then there is nothing
    156   // else to be done.
    157   if (Size > 1)
    158     return BundleIt;
    159 
    160   // Otherwise, extract the single instruction out and delete the bundle.
    161   MachineBasicBlock::iterator NextIt = std::next(BundleIt);
    162   MachineInstr *SingleI = BundleIt->getNextNode();
    163   SingleI->unbundleFromPred();
    164   assert(!SingleI->isBundledWithSucc());
    165   BundleIt->eraseFromParent();
    166   return NextIt;
    167 }
    168 
    169 
    170 bool HexagonPacketizer::runOnMachineFunction(MachineFunction &MF) {
    171   if (DisablePacketizer)
    172     return false;
    173 
    174   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
    175   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    176   auto &MLI = getAnalysis<MachineLoopInfo>();
    177   auto *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
    178   auto *MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
    179 
    180   if (EnableGenAllInsnClass)
    181     HII->genAllInsnTimingClasses(MF);
    182 
    183   // Instantiate the packetizer.
    184   HexagonPacketizerList Packetizer(MF, MLI, AA, MBPI);
    185 
    186   // DFA state table should not be empty.
    187   assert(Packetizer.getResourceTracker() && "Empty DFA table!");
    188 
    189   //
    190   // Loop over all basic blocks and remove KILL pseudo-instructions
    191   // These instructions confuse the dependence analysis. Consider:
    192   // D0 = ...   (Insn 0)
    193   // R0 = KILL R0, D0 (Insn 1)
    194   // R0 = ... (Insn 2)
    195   // Here, Insn 1 will result in the dependence graph not emitting an output
    196   // dependence between Insn 0 and Insn 2. This can lead to incorrect
    197   // packetization
    198   //
    199   for (auto &MB : MF) {
    200     auto End = MB.end();
    201     auto MI = MB.begin();
    202     while (MI != End) {
    203       auto NextI = std::next(MI);
    204       if (MI->isKill()) {
    205         MB.erase(MI);
    206         End = MB.end();
    207       }
    208       MI = NextI;
    209     }
    210   }
    211 
    212   // Loop over all of the basic blocks.
    213   for (auto &MB : MF) {
    214     auto Begin = MB.begin(), End = MB.end();
    215     while (Begin != End) {
    216       // First the first non-boundary starting from the end of the last
    217       // scheduling region.
    218       MachineBasicBlock::iterator RB = Begin;
    219       while (RB != End && HII->isSchedulingBoundary(RB, &MB, MF))
    220         ++RB;
    221       // First the first boundary starting from the beginning of the new
    222       // region.
    223       MachineBasicBlock::iterator RE = RB;
    224       while (RE != End && !HII->isSchedulingBoundary(RE, &MB, MF))
    225         ++RE;
    226       // Add the scheduling boundary if it's not block end.
    227       if (RE != End)
    228         ++RE;
    229       // If RB == End, then RE == End.
    230       if (RB != End)
    231         Packetizer.PacketizeMIs(&MB, RB, RE);
    232 
    233       Begin = RE;
    234     }
    235   }
    236 
    237   Packetizer.unpacketizeSoloInstrs(MF);
    238   return true;
    239 }
    240 
    241 
    242 // Reserve resources for a constant extender. Trigger an assertion if the
    243 // reservation fails.
    244 void HexagonPacketizerList::reserveResourcesForConstExt() {
    245   if (!tryAllocateResourcesForConstExt(true))
    246     llvm_unreachable("Resources not available");
    247 }
    248 
    249 bool HexagonPacketizerList::canReserveResourcesForConstExt() {
    250   return tryAllocateResourcesForConstExt(false);
    251 }
    252 
    253 // Allocate resources (i.e. 4 bytes) for constant extender. If succeeded,
    254 // return true, otherwise, return false.
    255 bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
    256   auto *ExtMI = MF.CreateMachineInstr(HII->get(Hexagon::A4_ext), DebugLoc());
    257   bool Avail = ResourceTracker->canReserveResources(ExtMI);
    258   if (Reserve && Avail)
    259     ResourceTracker->reserveResources(ExtMI);
    260   MF.DeleteMachineInstr(ExtMI);
    261   return Avail;
    262 }
    263 
    264 
    265 bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
    266       SDep::Kind DepType, unsigned DepReg) {
    267   // Check for LR dependence.
    268   if (DepReg == HRI->getRARegister())
    269     return true;
    270 
    271   if (HII->isDeallocRet(MI))
    272     if (DepReg == HRI->getFrameRegister() || DepReg == HRI->getStackRegister())
    273       return true;
    274 
    275   // Check if this is a predicate dependence.
    276   const TargetRegisterClass* RC = HRI->getMinimalPhysRegClass(DepReg);
    277   if (RC == &Hexagon::PredRegsRegClass)
    278     return true;
    279 
    280   // Assumes that the first operand of the CALLr is the function address.
    281   if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
    282     MachineOperand MO = MI->getOperand(0);
    283     if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
    284       return true;
    285   }
    286 
    287   return false;
    288 }
    289 
    290 static bool isRegDependence(const SDep::Kind DepType) {
    291   return DepType == SDep::Data || DepType == SDep::Anti ||
    292          DepType == SDep::Output;
    293 }
    294 
    295 static bool isDirectJump(const MachineInstr* MI) {
    296   return MI->getOpcode() == Hexagon::J2_jump;
    297 }
    298 
    299 static bool isSchedBarrier(const MachineInstr* MI) {
    300   switch (MI->getOpcode()) {
    301   case Hexagon::Y2_barrier:
    302     return true;
    303   }
    304   return false;
    305 }
    306 
    307 static bool isControlFlow(const MachineInstr* MI) {
    308   return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
    309 }
    310 
    311 
    312 /// Returns true if the instruction modifies a callee-saved register.
    313 static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
    314                                      const TargetRegisterInfo *TRI) {
    315   const MachineFunction &MF = *MI->getParent()->getParent();
    316   for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
    317     if (MI->modifiesRegister(*CSR, TRI))
    318       return true;
    319   return false;
    320 }
    321 
    322 // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
    323 // Returns true if an instruction can be promoted to .new predicate or
    324 // new-value store.
    325 bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
    326   return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
    327 }
    328 
    329 // Promote an instructiont to its .cur form.
    330 // At this time, we have already made a call to canPromoteToDotCur and made
    331 // sure that it can *indeed* be promoted.
    332 bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
    333       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    334       const TargetRegisterClass* RC) {
    335   assert(DepType == SDep::Data);
    336   int CurOpcode = HII->getDotCurOp(MI);
    337   MI->setDesc(HII->get(CurOpcode));
    338   return true;
    339 }
    340 
    341 void HexagonPacketizerList::cleanUpDotCur() {
    342   MachineInstr *MI = NULL;
    343   for (auto BI : CurrentPacketMIs) {
    344     DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
    345     if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
    346       MI = BI;
    347       continue;
    348     }
    349     if (MI) {
    350       for (auto &MO : BI->operands())
    351         if (MO.isReg() && MO.getReg() == MI->getOperand(0).getReg())
    352           return;
    353     }
    354   }
    355   if (!MI)
    356     return;
    357   // We did not find a use of the CUR, so de-cur it.
    358   MI->setDesc(HII->get(Hexagon::V6_vL32b_ai));
    359   DEBUG(dbgs() << "Demoted CUR "; MI->dump(););
    360 }
    361 
    362 // Check to see if an instruction can be dot cur.
    363 bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
    364       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    365       const TargetRegisterClass *RC) {
    366   if (!HII->isV60VectorInstruction(MI))
    367     return false;
    368   if (!HII->isV60VectorInstruction(MII))
    369     return false;
    370 
    371   // Already a dot new instruction.
    372   if (HII->isDotCurInst(MI) && !HII->mayBeCurLoad(MI))
    373     return false;
    374 
    375   if (!HII->mayBeCurLoad(MI))
    376     return false;
    377 
    378   // The "cur value" cannot come from inline asm.
    379   if (PacketSU->getInstr()->isInlineAsm())
    380     return false;
    381 
    382   // Make sure candidate instruction uses cur.
    383   DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
    384         MI->dump();
    385         dbgs() << "in packet\n";);
    386   MachineInstr *MJ = MII;
    387   DEBUG(dbgs() << "Checking CUR against "; MJ->dump(););
    388   unsigned DestReg = MI->getOperand(0).getReg();
    389   bool FoundMatch = false;
    390   for (auto &MO : MJ->operands())
    391     if (MO.isReg() && MO.getReg() == DestReg)
    392       FoundMatch = true;
    393   if (!FoundMatch)
    394     return false;
    395 
    396   // Check for existing uses of a vector register within the packet which
    397   // would be affected by converting a vector load into .cur formt.
    398   for (auto BI : CurrentPacketMIs) {
    399     DEBUG(dbgs() << "packet has "; BI->dump(););
    400     if (BI->readsRegister(DepReg, MF.getSubtarget().getRegisterInfo()))
    401       return false;
    402   }
    403 
    404   DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
    405   // We can convert the opcode into a .cur.
    406   return true;
    407 }
    408 
    409 // Promote an instruction to its .new form. At this time, we have already
    410 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
    411 // promoted.
    412 bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
    413       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
    414       const TargetRegisterClass* RC) {
    415   assert (DepType == SDep::Data);
    416   int NewOpcode;
    417   if (RC == &Hexagon::PredRegsRegClass)
    418     NewOpcode = HII->getDotNewPredOp(MI, MBPI);
    419   else
    420     NewOpcode = HII->getDotNewOp(MI);
    421   MI->setDesc(HII->get(NewOpcode));
    422   return true;
    423 }
    424 
    425 bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
    426   int NewOpcode = HII->getDotOldOp(MI->getOpcode());
    427   MI->setDesc(HII->get(NewOpcode));
    428   return true;
    429 }
    430 
    431 enum PredicateKind {
    432   PK_False,
    433   PK_True,
    434   PK_Unknown
    435 };
    436 
    437 /// Returns true if an instruction is predicated on p0 and false if it's
    438 /// predicated on !p0.
    439 static PredicateKind getPredicateSense(const MachineInstr *MI,
    440                                        const HexagonInstrInfo *HII) {
    441   if (!HII->isPredicated(MI))
    442     return PK_Unknown;
    443   if (HII->isPredicatedTrue(MI))
    444     return PK_True;
    445   return PK_False;
    446 }
    447 
    448 static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
    449       const HexagonInstrInfo *HII) {
    450   assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
    451 #ifndef NDEBUG
    452   // Post Increment means duplicates. Use dense map to find duplicates in the
    453   // list. Caution: Densemap initializes with the minimum of 64 buckets,
    454   // whereas there are at most 5 operands in the post increment.
    455   DenseSet<unsigned> DefRegsSet;
    456   for (auto &MO : MI->operands())
    457     if (MO.isReg() && MO.isDef())
    458       DefRegsSet.insert(MO.getReg());
    459 
    460   for (auto &MO : MI->operands())
    461     if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
    462       return MO;
    463 #else
    464   if (MI->mayLoad()) {
    465     const MachineOperand &Op1 = MI->getOperand(1);
    466     // The 2nd operand is always the post increment operand in load.
    467     assert(Op1.isReg() && "Post increment operand has be to a register.");
    468     return Op1;
    469   }
    470   if (MI->getDesc().mayStore()) {
    471     const MachineOperand &Op0 = MI->getOperand(0);
    472     // The 1st operand is always the post increment operand in store.
    473     assert(Op0.isReg() && "Post increment operand has be to a register.");
    474     return Op0;
    475   }
    476 #endif
    477   // we should never come here.
    478   llvm_unreachable("mayLoad or mayStore not set for Post Increment operation");
    479 }
    480 
    481 // Get the value being stored.
    482 static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
    483   // value being stored is always the last operand.
    484   return MI->getOperand(MI->getNumOperands()-1);
    485 }
    486 
    487 static bool isLoadAbsSet(const MachineInstr *MI) {
    488   unsigned Opc = MI->getOpcode();
    489   switch (Opc) {
    490     case Hexagon::L4_loadrd_ap:
    491     case Hexagon::L4_loadrb_ap:
    492     case Hexagon::L4_loadrh_ap:
    493     case Hexagon::L4_loadrub_ap:
    494     case Hexagon::L4_loadruh_ap:
    495     case Hexagon::L4_loadri_ap:
    496       return true;
    497   }
    498   return false;
    499 }
    500 
    501 static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
    502   assert(isLoadAbsSet(MI));
    503   return MI->getOperand(1);
    504 }
    505 
    506 
    507 // Can be new value store?
    508 // Following restrictions are to be respected in convert a store into
    509 // a new value store.
    510 // 1. If an instruction uses auto-increment, its address register cannot
    511 //    be a new-value register. Arch Spec 5.4.2.1
    512 // 2. If an instruction uses absolute-set addressing mode, its address
    513 //    register cannot be a new-value register. Arch Spec 5.4.2.1.
    514 // 3. If an instruction produces a 64-bit result, its registers cannot be used
    515 //    as new-value registers. Arch Spec 5.4.2.2.
    516 // 4. If the instruction that sets the new-value register is conditional, then
    517 //    the instruction that uses the new-value register must also be conditional,
    518 //    and both must always have their predicates evaluate identically.
    519 //    Arch Spec 5.4.2.3.
    520 // 5. There is an implied restriction that a packet cannot have another store,
    521 //    if there is a new value store in the packet. Corollary: if there is
    522 //    already a store in a packet, there can not be a new value store.
    523 //    Arch Spec: 3.4.4.2
    524 bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
    525       const MachineInstr *PacketMI, unsigned DepReg) {
    526   // Make sure we are looking at the store, that can be promoted.
    527   if (!HII->mayBeNewStore(MI))
    528     return false;
    529 
    530   // Make sure there is dependency and can be new value'd.
    531   const MachineOperand &Val = getStoreValueOperand(MI);
    532   if (Val.isReg() && Val.getReg() != DepReg)
    533     return false;
    534 
    535   const MCInstrDesc& MCID = PacketMI->getDesc();
    536 
    537   // First operand is always the result.
    538   const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
    539   // Double regs can not feed into new value store: PRM section: 5.4.2.2.
    540   if (PacketRC == &Hexagon::DoubleRegsRegClass)
    541     return false;
    542 
    543   // New-value stores are of class NV (slot 0), dual stores require class ST
    544   // in slot 0 (PRM 5.5).
    545   for (auto I : CurrentPacketMIs) {
    546     SUnit *PacketSU = MIToSUnit.find(I)->second;
    547     if (PacketSU->getInstr()->mayStore())
    548       return false;
    549   }
    550 
    551   // Make sure it's NOT the post increment register that we are going to
    552   // new value.
    553   if (HII->isPostIncrement(MI) &&
    554       getPostIncrementOperand(MI, HII).getReg() == DepReg) {
    555     return false;
    556   }
    557 
    558   if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
    559       getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
    560     // If source is post_inc, or absolute-set addressing, it can not feed
    561     // into new value store
    562     //   r3 = memw(r2++#4)
    563     //   memw(r30 + #-1404) = r2.new -> can not be new value store
    564     // arch spec section: 5.4.2.1.
    565     return false;
    566   }
    567 
    568   if (isLoadAbsSet(PacketMI) && getAbsSetOperand(PacketMI).getReg() == DepReg)
    569     return false;
    570 
    571   // If the source that feeds the store is predicated, new value store must
    572   // also be predicated.
    573   if (HII->isPredicated(PacketMI)) {
    574     if (!HII->isPredicated(MI))
    575       return false;
    576 
    577     // Check to make sure that they both will have their predicates
    578     // evaluate identically.
    579     unsigned predRegNumSrc = 0;
    580     unsigned predRegNumDst = 0;
    581     const TargetRegisterClass* predRegClass = nullptr;
    582 
    583     // Get predicate register used in the source instruction.
    584     for (auto &MO : PacketMI->operands()) {
    585       if (!MO.isReg())
    586         continue;
    587       predRegNumSrc = MO.getReg();
    588       predRegClass = HRI->getMinimalPhysRegClass(predRegNumSrc);
    589       if (predRegClass == &Hexagon::PredRegsRegClass)
    590         break;
    591     }
    592     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    593         "predicate register not found in a predicated PacketMI instruction");
    594 
    595     // Get predicate register used in new-value store instruction.
    596     for (auto &MO : MI->operands()) {
    597       if (!MO.isReg())
    598         continue;
    599       predRegNumDst = MO.getReg();
    600       predRegClass = HRI->getMinimalPhysRegClass(predRegNumDst);
    601       if (predRegClass == &Hexagon::PredRegsRegClass)
    602         break;
    603     }
    604     assert((predRegClass == &Hexagon::PredRegsRegClass) &&
    605            "predicate register not found in a predicated MI instruction");
    606 
    607     // New-value register producer and user (store) need to satisfy these
    608     // constraints:
    609     // 1) Both instructions should be predicated on the same register.
    610     // 2) If producer of the new-value register is .new predicated then store
    611     // should also be .new predicated and if producer is not .new predicated
    612     // then store should not be .new predicated.
    613     // 3) Both new-value register producer and user should have same predicate
    614     // sense, i.e, either both should be negated or both should be non-negated.
    615     if (predRegNumDst != predRegNumSrc ||
    616         HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI)  ||
    617         getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
    618       return false;
    619   }
    620 
    621   // Make sure that other than the new-value register no other store instruction
    622   // register has been modified in the same packet. Predicate registers can be
    623   // modified by they should not be modified between the producer and the store
    624   // instruction as it will make them both conditional on different values.
    625   // We already know this to be true for all the instructions before and
    626   // including PacketMI. Howerver, we need to perform the check for the
    627   // remaining instructions in the packet.
    628 
    629   unsigned StartCheck = 0;
    630 
    631   for (auto I : CurrentPacketMIs) {
    632     SUnit *TempSU = MIToSUnit.find(I)->second;
    633     MachineInstr* TempMI = TempSU->getInstr();
    634 
    635     // Following condition is true for all the instructions until PacketMI is
    636     // reached (StartCheck is set to 0 before the for loop).
    637     // StartCheck flag is 1 for all the instructions after PacketMI.
    638     if (TempMI != PacketMI && !StartCheck) // Start processing only after
    639       continue;                            // encountering PacketMI.
    640 
    641     StartCheck = 1;
    642     if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
    643       continue;
    644 
    645     for (auto &MO : MI->operands())
    646       if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
    647         return false;
    648   }
    649 
    650   // Make sure that for non-POST_INC stores:
    651   // 1. The only use of reg is DepReg and no other registers.
    652   //    This handles V4 base+index registers.
    653   //    The following store can not be dot new.
    654   //    Eg.   r0 = add(r0, #3)
    655   //          memw(r1+r0<<#2) = r0
    656   if (!HII->isPostIncrement(MI)) {
    657     for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
    658       const MachineOperand &MO = MI->getOperand(opNum);
    659       if (MO.isReg() && MO.getReg() == DepReg)
    660         return false;
    661     }
    662   }
    663 
    664   // If data definition is because of implicit definition of the register,
    665   // do not newify the store. Eg.
    666   // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
    667   // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
    668   for (auto &MO : PacketMI->operands()) {
    669     if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
    670       continue;
    671     unsigned R = MO.getReg();
    672     if (R == DepReg || HRI->isSuperRegister(DepReg, R))
    673       return false;
    674   }
    675 
    676   // Handle imp-use of super reg case. There is a target independent side
    677   // change that should prevent this situation but I am handling it for
    678   // just-in-case. For example, we cannot newify R2 in the following case:
    679   // %R3<def> = A2_tfrsi 0;
    680   // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
    681   for (auto &MO : MI->operands()) {
    682     if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
    683       return false;
    684   }
    685 
    686   // Can be dot new store.
    687   return true;
    688 }
    689 
    690 // Can this MI to promoted to either new value store or new value jump.
    691 bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
    692       const SUnit *PacketSU, unsigned DepReg,
    693       MachineBasicBlock::iterator &MII) {
    694   if (!HII->mayBeNewStore(MI))
    695     return false;
    696 
    697   // Check to see the store can be new value'ed.
    698   MachineInstr *PacketMI = PacketSU->getInstr();
    699   if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
    700     return true;
    701 
    702   // Check to see the compare/jump can be new value'ed.
    703   // This is done as a pass on its own. Don't need to check it here.
    704   return false;
    705 }
    706 
    707 static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
    708   for (auto &MO : I->operands())
    709     if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
    710       return true;
    711   return false;
    712 }
    713 
    714 // Check to see if an instruction can be dot new
    715 // There are three kinds.
    716 // 1. dot new on predicate - V2/V3/V4
    717 // 2. dot new on stores NV/ST - V4
    718 // 3. dot new on jump NV/J - V4 -- This is generated in a pass.
    719 bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
    720       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
    721       const TargetRegisterClass* RC) {
    722   // Already a dot new instruction.
    723   if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
    724     return false;
    725 
    726   if (!isNewifiable(MI))
    727     return false;
    728 
    729   const MachineInstr *PI = PacketSU->getInstr();
    730 
    731   // The "new value" cannot come from inline asm.
    732   if (PI->isInlineAsm())
    733     return false;
    734 
    735   // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
    736   // sense.
    737   if (PI->isImplicitDef())
    738     return false;
    739 
    740   // If dependency is trough an implicitly defined register, we should not
    741   // newify the use.
    742   if (isImplicitDependency(PI, DepReg))
    743     return false;
    744 
    745   const MCInstrDesc& MCID = PI->getDesc();
    746   const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
    747   if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
    748     return false;
    749 
    750   // predicate .new
    751   // bug 5670: until that is fixed
    752   // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
    753   if (RC == &Hexagon::PredRegsRegClass)
    754     if (HII->isCondInst(MI) || MI->isReturn())
    755       return HII->predCanBeUsedAsDotNew(PI, DepReg);
    756 
    757   if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
    758     return false;
    759 
    760   // Create a dot new machine instruction to see if resources can be
    761   // allocated. If not, bail out now.
    762   int NewOpcode = HII->getDotNewOp(MI);
    763   const MCInstrDesc &D = HII->get(NewOpcode);
    764   MachineInstr *NewMI = MF.CreateMachineInstr(D, DebugLoc());
    765   bool ResourcesAvailable = ResourceTracker->canReserveResources(NewMI);
    766   MF.DeleteMachineInstr(NewMI);
    767   if (!ResourcesAvailable)
    768     return false;
    769 
    770   // New Value Store only. New Value Jump generated as a separate pass.
    771   if (!canPromoteToNewValue(MI, PacketSU, DepReg, MII))
    772     return false;
    773 
    774   return true;
    775 }
    776 
    777 // Go through the packet instructions and search for an anti dependency between
    778 // them and DepReg from MI. Consider this case:
    779 // Trying to add
    780 // a) %R1<def> = TFRI_cdNotPt %P3, 2
    781 // to this packet:
    782 // {
    783 //   b) %P0<def> = C2_or %P3<kill>, %P0<kill>
    784 //   c) %P3<def> = C2_tfrrp %R23
    785 //   d) %R1<def> = C2_cmovenewit %P3, 4
    786 //  }
    787 // The P3 from a) and d) will be complements after
    788 // a)'s P3 is converted to .new form
    789 // Anti-dep between c) and b) is irrelevant for this case
    790 bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
    791                                                         unsigned DepReg) {
    792   SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
    793 
    794   for (auto I : CurrentPacketMIs) {
    795     // We only care for dependencies to predicated instructions
    796     if (!HII->isPredicated(I))
    797       continue;
    798 
    799     // Scheduling Unit for current insn in the packet
    800     SUnit *PacketSU = MIToSUnit.find(I)->second;
    801 
    802     // Look at dependencies between current members of the packet and
    803     // predicate defining instruction MI. Make sure that dependency is
    804     // on the exact register we care about.
    805     if (PacketSU->isSucc(PacketSUDep)) {
    806       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
    807         auto &Dep = PacketSU->Succs[i];
    808         if (Dep.getSUnit() == PacketSUDep && Dep.getKind() == SDep::Anti &&
    809             Dep.getReg() == DepReg)
    810           return true;
    811       }
    812     }
    813   }
    814 
    815   return false;
    816 }
    817 
    818 
    819 /// Gets the predicate register of a predicated instruction.
    820 static unsigned getPredicatedRegister(MachineInstr *MI,
    821                                       const HexagonInstrInfo *QII) {
    822   /// We use the following rule: The first predicate register that is a use is
    823   /// the predicate register of a predicated instruction.
    824   assert(QII->isPredicated(MI) && "Must be predicated instruction");
    825 
    826   for (auto &Op : MI->operands()) {
    827     if (Op.isReg() && Op.getReg() && Op.isUse() &&
    828         Hexagon::PredRegsRegClass.contains(Op.getReg()))
    829       return Op.getReg();
    830   }
    831 
    832   llvm_unreachable("Unknown instruction operand layout");
    833   return 0;
    834 }
    835 
    836 // Given two predicated instructions, this function detects whether
    837 // the predicates are complements.
    838 bool HexagonPacketizerList::arePredicatesComplements(MachineInstr *MI1,
    839                                                      MachineInstr *MI2) {
    840   // If we don't know the predicate sense of the instructions bail out early, we
    841   // need it later.
    842   if (getPredicateSense(MI1, HII) == PK_Unknown ||
    843       getPredicateSense(MI2, HII) == PK_Unknown)
    844     return false;
    845 
    846   // Scheduling unit for candidate.
    847   SUnit *SU = MIToSUnit[MI1];
    848 
    849   // One corner case deals with the following scenario:
    850   // Trying to add
    851   // a) %R24<def> = A2_tfrt %P0, %R25
    852   // to this packet:
    853   // {
    854   //   b) %R25<def> = A2_tfrf %P0, %R24
    855   //   c) %P0<def> = C2_cmpeqi %R26, 1
    856   // }
    857   //
    858   // On general check a) and b) are complements, but presence of c) will
    859   // convert a) to .new form, and then it is not a complement.
    860   // We attempt to detect it by analyzing existing dependencies in the packet.
    861 
    862   // Analyze relationships between all existing members of the packet.
    863   // Look for Anti dependecy on the same predicate reg as used in the
    864   // candidate.
    865   for (auto I : CurrentPacketMIs) {
    866     // Scheduling Unit for current insn in the packet.
    867     SUnit *PacketSU = MIToSUnit.find(I)->second;
    868 
    869     // If this instruction in the packet is succeeded by the candidate...
    870     if (PacketSU->isSucc(SU)) {
    871       for (unsigned i = 0; i < PacketSU->Succs.size(); ++i) {
    872         auto Dep = PacketSU->Succs[i];
    873         // The corner case exist when there is true data dependency between
    874         // candidate and one of current packet members, this dep is on
    875         // predicate reg, and there already exist anti dep on the same pred in
    876         // the packet.
    877         if (Dep.getSUnit() == SU && Dep.getKind() == SDep::Data &&
    878             Hexagon::PredRegsRegClass.contains(Dep.getReg())) {
    879           // Here I know that I is predicate setting instruction with true
    880           // data dep to candidate on the register we care about - c) in the
    881           // above example. Now I need to see if there is an anti dependency
    882           // from c) to any other instruction in the same packet on the pred
    883           // reg of interest.
    884           if (restrictingDepExistInPacket(I, Dep.getReg()))
    885             return false;
    886         }
    887       }
    888     }
    889   }
    890 
    891   // If the above case does not apply, check regular complement condition.
    892   // Check that the predicate register is the same and that the predicate
    893   // sense is different We also need to differentiate .old vs. .new: !p0
    894   // is not complementary to p0.new.
    895   unsigned PReg1 = getPredicatedRegister(MI1, HII);
    896   unsigned PReg2 = getPredicatedRegister(MI2, HII);
    897   return PReg1 == PReg2 &&
    898          Hexagon::PredRegsRegClass.contains(PReg1) &&
    899          Hexagon::PredRegsRegClass.contains(PReg2) &&
    900          getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
    901          HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
    902 }
    903 
    904 // Initialize packetizer flags.
    905 void HexagonPacketizerList::initPacketizerState() {
    906   Dependence = false;
    907   PromotedToDotNew = false;
    908   GlueToNewValueJump = false;
    909   GlueAllocframeStore = false;
    910   FoundSequentialDependence = false;
    911 }
    912 
    913 // Ignore bundling of pseudo instructions.
    914 bool HexagonPacketizerList::ignorePseudoInstruction(const MachineInstr *MI,
    915       const MachineBasicBlock*) {
    916   if (MI->isDebugValue())
    917     return true;
    918 
    919   if (MI->isCFIInstruction())
    920     return false;
    921 
    922   // We must print out inline assembly.
    923   if (MI->isInlineAsm())
    924     return false;
    925 
    926   if (MI->isImplicitDef())
    927     return false;
    928 
    929   // We check if MI has any functional units mapped to it. If it doesn't,
    930   // we ignore the instruction.
    931   const MCInstrDesc& TID = MI->getDesc();
    932   auto *IS = ResourceTracker->getInstrItins()->beginStage(TID.getSchedClass());
    933   unsigned FuncUnits = IS->getUnits();
    934   return !FuncUnits;
    935 }
    936 
    937 bool HexagonPacketizerList::isSoloInstruction(const MachineInstr *MI) {
    938   if (MI->isEHLabel() || MI->isCFIInstruction())
    939     return true;
    940 
    941   // Consider inline asm to not be a solo instruction by default.
    942   // Inline asm will be put in a packet temporarily, but then it will be
    943   // removed, and placed outside of the packet (before or after, depending
    944   // on dependencies).  This is to reduce the impact of inline asm as a
    945   // "packet splitting" instruction.
    946   if (MI->isInlineAsm() && !ScheduleInlineAsm)
    947     return true;
    948 
    949   // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
    950   // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
    951   // They must not be grouped with other instructions in a packet.
    952   if (isSchedBarrier(MI))
    953     return true;
    954 
    955   if (HII->isSolo(MI))
    956     return true;
    957 
    958   if (MI->getOpcode() == Hexagon::A2_nop)
    959     return true;
    960 
    961   return false;
    962 }
    963 
    964 
    965 // Quick check if instructions MI and MJ cannot coexist in the same packet.
    966 // Limit the tests to be "one-way", e.g.  "if MI->isBranch and MJ->isInlineAsm",
    967 // but not the symmetric case: "if MJ->isBranch and MI->isInlineAsm".
    968 // For full test call this function twice:
    969 //   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
    970 // Doing the test only one way saves the amount of code in this function,
    971 // since every test would need to be repeated with the MI and MJ reversed.
    972 static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
    973       const HexagonInstrInfo &HII) {
    974   const MachineFunction *MF = MI->getParent()->getParent();
    975   if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
    976       HII.isHVXMemWithAIndirect(MI, MJ))
    977     return true;
    978 
    979   // An inline asm cannot be together with a branch, because we may not be
    980   // able to remove the asm out after packetizing (i.e. if the asm must be
    981   // moved past the bundle).  Similarly, two asms cannot be together to avoid
    982   // complications when determining their relative order outside of a bundle.
    983   if (MI->isInlineAsm())
    984     return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
    985            MJ->isCall() || MJ->isTerminator();
    986 
    987   // "False" really means that the quick check failed to determine if
    988   // I and J cannot coexist.
    989   return false;
    990 }
    991 
    992 
    993 // Full, symmetric check.
    994 bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
    995       const MachineInstr *MJ) {
    996   return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
    997 }
    998 
    999 void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
   1000   for (auto &B : MF) {
   1001     MachineBasicBlock::iterator BundleIt;
   1002     MachineBasicBlock::instr_iterator NextI;
   1003     for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
   1004       NextI = std::next(I);
   1005       MachineInstr *MI = &*I;
   1006       if (MI->isBundle())
   1007         BundleIt = I;
   1008       if (!MI->isInsideBundle())
   1009         continue;
   1010 
   1011       // Decide on where to insert the instruction that we are pulling out.
   1012       // Debug instructions always go before the bundle, but the placement of
   1013       // INLINE_ASM depends on potential dependencies.  By default, try to
   1014       // put it before the bundle, but if the asm writes to a register that
   1015       // other instructions in the bundle read, then we need to place it
   1016       // after the bundle (to preserve the bundle semantics).
   1017       bool InsertBeforeBundle;
   1018       if (MI->isInlineAsm())
   1019         InsertBeforeBundle = !hasWriteToReadDep(MI, BundleIt, HRI);
   1020       else if (MI->isDebugValue())
   1021         InsertBeforeBundle = true;
   1022       else
   1023         continue;
   1024 
   1025       BundleIt = moveInstrOut(MI, BundleIt, InsertBeforeBundle);
   1026     }
   1027   }
   1028 }
   1029 
   1030 // Check if a given instruction is of class "system".
   1031 static bool isSystemInstr(const MachineInstr *MI) {
   1032   unsigned Opc = MI->getOpcode();
   1033   switch (Opc) {
   1034     case Hexagon::Y2_barrier:
   1035     case Hexagon::Y2_dcfetchbo:
   1036       return true;
   1037   }
   1038   return false;
   1039 }
   1040 
   1041 bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
   1042                                               const MachineInstr *J) {
   1043   // The dependence graph may not include edges between dead definitions,
   1044   // so without extra checks, we could end up packetizing two instruction
   1045   // defining the same (dead) register.
   1046   if (I->isCall() || J->isCall())
   1047     return false;
   1048   if (HII->isPredicated(I) || HII->isPredicated(J))
   1049     return false;
   1050 
   1051   BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
   1052   for (auto &MO : I->operands()) {
   1053     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1054       continue;
   1055     DeadDefs[MO.getReg()] = true;
   1056   }
   1057 
   1058   for (auto &MO : J->operands()) {
   1059     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
   1060       continue;
   1061     unsigned R = MO.getReg();
   1062     if (R != Hexagon::USR_OVF && DeadDefs[R])
   1063       return true;
   1064   }
   1065   return false;
   1066 }
   1067 
   1068 bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
   1069                                                  const MachineInstr *J) {
   1070   // A save callee-save register function call can only be in a packet
   1071   // with instructions that don't write to the callee-save registers.
   1072   if ((HII->isSaveCalleeSavedRegsCall(I) &&
   1073        doesModifyCalleeSavedReg(J, HRI)) ||
   1074       (HII->isSaveCalleeSavedRegsCall(J) &&
   1075        doesModifyCalleeSavedReg(I, HRI)))
   1076     return true;
   1077 
   1078   // Two control flow instructions cannot go in the same packet.
   1079   if (isControlFlow(I) && isControlFlow(J))
   1080     return true;
   1081 
   1082   // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
   1083   // contain a speculative indirect jump,
   1084   // a new-value compare jump or a dealloc_return.
   1085   auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
   1086     if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
   1087       return true;
   1088     if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
   1089       return true;
   1090     return false;
   1091   };
   1092 
   1093   if (HII->isLoopN(I) && isBadForLoopN(J))
   1094     return true;
   1095   if (HII->isLoopN(J) && isBadForLoopN(I))
   1096     return true;
   1097 
   1098   // dealloc_return cannot appear in the same packet as a conditional or
   1099   // unconditional jump.
   1100   return HII->isDeallocRet(I) &&
   1101          (J->isBranch() || J->isCall() || J->isBarrier());
   1102 }
   1103 
   1104 bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
   1105                                                     const MachineInstr *J) {
   1106   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
   1107   bool StoreI = I->mayStore(), StoreJ = J->mayStore();
   1108   if ((SysI && StoreJ) || (SysJ && StoreI))
   1109     return true;
   1110 
   1111   if (StoreI && StoreJ) {
   1112     if (HII->isNewValueInst(J) || HII->isMemOp(J) || HII->isMemOp(I))
   1113       return true;
   1114   } else {
   1115     // A memop cannot be in the same packet with another memop or a store.
   1116     // Two stores can be together, but here I and J cannot both be stores.
   1117     bool MopStI = HII->isMemOp(I) || StoreI;
   1118     bool MopStJ = HII->isMemOp(J) || StoreJ;
   1119     if (MopStI && MopStJ)
   1120       return true;
   1121   }
   1122 
   1123   return (StoreJ && HII->isDeallocRet(I)) || (StoreI && HII->isDeallocRet(J));
   1124 }
   1125 
   1126 // SUI is the current instruction that is out side of the current packet.
   1127 // SUJ is the current instruction inside the current packet against which that
   1128 // SUI will be packetized.
   1129 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
   1130   MachineInstr *I = SUI->getInstr();
   1131   MachineInstr *J = SUJ->getInstr();
   1132   assert(I && J && "Unable to packetize null instruction!");
   1133 
   1134   // Clear IgnoreDepMIs when Packet starts.
   1135   if (CurrentPacketMIs.size() == 1)
   1136     IgnoreDepMIs.clear();
   1137 
   1138   MachineBasicBlock::iterator II = I;
   1139   const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
   1140 
   1141   // Solo instructions cannot go in the packet.
   1142   assert(!isSoloInstruction(I) && "Unexpected solo instr!");
   1143 
   1144   if (cannotCoexist(I, J))
   1145     return false;
   1146 
   1147   Dependence = hasDeadDependence(I, J) || hasControlDependence(I, J);
   1148   if (Dependence)
   1149     return false;
   1150 
   1151   // V4 allows dual stores. It does not allow second store, if the first
   1152   // store is not in SLOT0. New value store, new value jump, dealloc_return
   1153   // and memop always take SLOT0. Arch spec 3.4.4.2.
   1154   Dependence = hasV4SpecificDependence(I, J);
   1155   if (Dependence)
   1156     return false;
   1157 
   1158   // If an instruction feeds new value jump, glue it.
   1159   MachineBasicBlock::iterator NextMII = I;
   1160   ++NextMII;
   1161   if (NextMII != I->getParent()->end() && HII->isNewValueJump(NextMII)) {
   1162     MachineInstr *NextMI = NextMII;
   1163 
   1164     bool secondRegMatch = false;
   1165     const MachineOperand &NOp0 = NextMI->getOperand(0);
   1166     const MachineOperand &NOp1 = NextMI->getOperand(1);
   1167 
   1168     if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
   1169       secondRegMatch = true;
   1170 
   1171     for (auto I : CurrentPacketMIs) {
   1172       SUnit *PacketSU = MIToSUnit.find(I)->second;
   1173       MachineInstr *PI = PacketSU->getInstr();
   1174       // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
   1175       if (PI->isCall()) {
   1176         Dependence = true;
   1177         break;
   1178       }
   1179       // Validate:
   1180       // 1. Packet does not have a store in it.
   1181       // 2. If the first operand of the nvj is newified, and the second
   1182       //    operand is also a reg, it (second reg) is not defined in
   1183       //    the same packet.
   1184       // 3. If the second operand of the nvj is newified, (which means
   1185       //    first operand is also a reg), first reg is not defined in
   1186       //    the same packet.
   1187       if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
   1188           HII->isLoopN(PI)) {
   1189         Dependence = true;
   1190         break;
   1191       }
   1192       // Check #2/#3.
   1193       const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
   1194       if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
   1195         Dependence = true;
   1196         break;
   1197       }
   1198     }
   1199 
   1200     if (Dependence)
   1201       return false;
   1202     GlueToNewValueJump = true;
   1203   }
   1204 
   1205   // There no dependency between a prolog instruction and its successor.
   1206   if (!SUJ->isSucc(SUI))
   1207     return true;
   1208 
   1209   for (unsigned i = 0; i < SUJ->Succs.size(); ++i) {
   1210     if (FoundSequentialDependence)
   1211       break;
   1212 
   1213     if (SUJ->Succs[i].getSUnit() != SUI)
   1214       continue;
   1215 
   1216     SDep::Kind DepType = SUJ->Succs[i].getKind();
   1217     // For direct calls:
   1218     // Ignore register dependences for call instructions for packetization
   1219     // purposes except for those due to r31 and predicate registers.
   1220     //
   1221     // For indirect calls:
   1222     // Same as direct calls + check for true dependences to the register
   1223     // used in the indirect call.
   1224     //
   1225     // We completely ignore Order dependences for call instructions.
   1226     //
   1227     // For returns:
   1228     // Ignore register dependences for return instructions like jumpr,
   1229     // dealloc return unless we have dependencies on the explicit uses
   1230     // of the registers used by jumpr (like r31) or dealloc return
   1231     // (like r29 or r30).
   1232     //
   1233     // TODO: Currently, jumpr is handling only return of r31. So, the
   1234     // following logic (specificaly isCallDependent) is working fine.
   1235     // We need to enable jumpr for register other than r31 and then,
   1236     // we need to rework the last part, where it handles indirect call
   1237     // of that (isCallDependent) function. Bug 6216 is opened for this.
   1238     unsigned DepReg = 0;
   1239     const TargetRegisterClass *RC = nullptr;
   1240     if (DepType == SDep::Data) {
   1241       DepReg = SUJ->Succs[i].getReg();
   1242       RC = HRI->getMinimalPhysRegClass(DepReg);
   1243     }
   1244 
   1245     if (I->isCall() || I->isReturn()) {
   1246       if (!isRegDependence(DepType))
   1247         continue;
   1248       if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
   1249         continue;
   1250     }
   1251 
   1252     if (DepType == SDep::Data) {
   1253       if (canPromoteToDotCur(J, SUJ, DepReg, II, RC))
   1254         if (promoteToDotCur(J, DepType, II, RC))
   1255           continue;
   1256     }
   1257 
   1258     // Data dpendence ok if we have load.cur.
   1259     if (DepType == SDep::Data && HII->isDotCurInst(J)) {
   1260       if (HII->isV60VectorInstruction(I))
   1261         continue;
   1262     }
   1263 
   1264     // For instructions that can be promoted to dot-new, try to promote.
   1265     if (DepType == SDep::Data) {
   1266       if (canPromoteToDotNew(I, SUJ, DepReg, II, RC)) {
   1267         if (promoteToDotNew(I, DepType, II, RC)) {
   1268           PromotedToDotNew = true;
   1269           continue;
   1270         }
   1271       }
   1272       if (HII->isNewValueJump(I))
   1273         continue;
   1274     }
   1275 
   1276     // For predicated instructions, if the predicates are complements then
   1277     // there can be no dependence.
   1278     if (HII->isPredicated(I) && HII->isPredicated(J) &&
   1279         arePredicatesComplements(I, J)) {
   1280       // Not always safe to do this translation.
   1281       // DAG Builder attempts to reduce dependence edges using transitive
   1282       // nature of dependencies. Here is an example:
   1283       //
   1284       // r0 = tfr_pt ... (1)
   1285       // r0 = tfr_pf ... (2)
   1286       // r0 = tfr_pt ... (3)
   1287       //
   1288       // There will be an output dependence between (1)->(2) and (2)->(3).
   1289       // However, there is no dependence edge between (1)->(3). This results
   1290       // in all 3 instructions going in the same packet. We ignore dependce
   1291       // only once to avoid this situation.
   1292       auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
   1293       if (Itr != IgnoreDepMIs.end()) {
   1294         Dependence = true;
   1295         return false;
   1296       }
   1297       IgnoreDepMIs.push_back(I);
   1298       continue;
   1299     }
   1300 
   1301     // Ignore Order dependences between unconditional direct branches
   1302     // and non-control-flow instructions.
   1303     if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
   1304         DepType == SDep::Order)
   1305       continue;
   1306 
   1307     // Ignore all dependences for jumps except for true and output
   1308     // dependences.
   1309     if (I->isConditionalBranch() && DepType != SDep::Data &&
   1310         DepType != SDep::Output)
   1311       continue;
   1312 
   1313     // Ignore output dependences due to superregs. We can write to two
   1314     // different subregisters of R1:0 for instance in the same cycle.
   1315 
   1316     // If neither I nor J defines DepReg, then this is a superfluous output
   1317     // dependence. The dependence must be of the form:
   1318     //   R0 = ...
   1319     //   R1 = ...
   1320     // and there is an output dependence between the two instructions with
   1321     // DepReg = D0.
   1322     // We want to ignore these dependences. Ideally, the dependence
   1323     // constructor should annotate such dependences. We can then avoid this
   1324     // relatively expensive check.
   1325     //
   1326     if (DepType == SDep::Output) {
   1327       // DepReg is the register that's responsible for the dependence.
   1328       unsigned DepReg = SUJ->Succs[i].getReg();
   1329 
   1330       // Check if I and J really defines DepReg.
   1331       if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
   1332         continue;
   1333       FoundSequentialDependence = true;
   1334       break;
   1335     }
   1336 
   1337     // For Order dependences:
   1338     // 1. On V4 or later, volatile loads/stores can be packetized together,
   1339     //    unless other rules prevent is.
   1340     // 2. Store followed by a load is not allowed.
   1341     // 3. Store followed by a store is only valid on V4 or later.
   1342     // 4. Load followed by any memory operation is allowed.
   1343     if (DepType == SDep::Order) {
   1344       if (!PacketizeVolatiles) {
   1345         bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
   1346         if (OrdRefs) {
   1347           FoundSequentialDependence = true;
   1348           break;
   1349         }
   1350       }
   1351       // J is first, I is second.
   1352       bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
   1353       bool LoadI = I->mayLoad(), StoreI = I->mayStore();
   1354       if (StoreJ) {
   1355         // Two stores are only allowed on V4+. Load following store is never
   1356         // allowed.
   1357         if (LoadI) {
   1358           FoundSequentialDependence = true;
   1359           break;
   1360         }
   1361       } else if (!LoadJ || (!LoadI && !StoreI)) {
   1362         // If J is neither load nor store, assume a dependency.
   1363         // If J is a load, but I is neither, also assume a dependency.
   1364         FoundSequentialDependence = true;
   1365         break;
   1366       }
   1367       // Store followed by store: not OK on V2.
   1368       // Store followed by load: not OK on all.
   1369       // Load followed by store: OK on all.
   1370       // Load followed by load: OK on all.
   1371       continue;
   1372     }
   1373 
   1374     // For V4, special case ALLOCFRAME. Even though there is dependency
   1375     // between ALLOCFRAME and subsequent store, allow it to be packetized
   1376     // in a same packet. This implies that the store is using the caller's
   1377     // SP. Hence, offset needs to be updated accordingly.
   1378     if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
   1379       unsigned Opc = I->getOpcode();
   1380       switch (Opc) {
   1381         case Hexagon::S2_storerd_io:
   1382         case Hexagon::S2_storeri_io:
   1383         case Hexagon::S2_storerh_io:
   1384         case Hexagon::S2_storerb_io:
   1385           if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
   1386             int64_t Imm = I->getOperand(1).getImm();
   1387             int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
   1388             if (HII->isValidOffset(Opc, NewOff)) {
   1389               GlueAllocframeStore = true;
   1390               // Since this store is to be glued with allocframe in the same
   1391               // packet, it will use SP of the previous stack frame, i.e.
   1392               // caller's SP. Therefore, we need to recalculate offset
   1393               // according to this change.
   1394               I->getOperand(1).setImm(NewOff);
   1395               continue;
   1396             }
   1397           }
   1398         default:
   1399           break;
   1400       }
   1401     }
   1402 
   1403     // Skip over anti-dependences. Two instructions that are anti-dependent
   1404     // can share a packet.
   1405     if (DepType != SDep::Anti) {
   1406       FoundSequentialDependence = true;
   1407       break;
   1408     }
   1409   }
   1410 
   1411   if (FoundSequentialDependence) {
   1412     Dependence = true;
   1413     return false;
   1414   }
   1415 
   1416   return true;
   1417 }
   1418 
   1419 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
   1420   MachineInstr *I = SUI->getInstr();
   1421   MachineInstr *J = SUJ->getInstr();
   1422   assert(I && J && "Unable to packetize null instruction!");
   1423 
   1424   if (cannotCoexist(I, J))
   1425     return false;
   1426 
   1427   if (!Dependence)
   1428     return true;
   1429 
   1430   // Check if the instruction was promoted to a dot-new. If so, demote it
   1431   // back into a dot-old.
   1432   if (PromotedToDotNew)
   1433     demoteToDotOld(I);
   1434 
   1435   cleanUpDotCur();
   1436   // Check if the instruction (must be a store) was glued with an allocframe
   1437   // instruction. If so, restore its offset to its original value, i.e. use
   1438   // current SP instead of caller's SP.
   1439   if (GlueAllocframeStore) {
   1440     unsigned FrameSize = MF.getFrameInfo()->getStackSize();
   1441     MachineOperand &MOff = I->getOperand(1);
   1442     MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
   1443   }
   1444   return false;
   1445 }
   1446 
   1447 
   1448 MachineBasicBlock::iterator
   1449 HexagonPacketizerList::addToPacket(MachineInstr *MI) {
   1450   MachineBasicBlock::iterator MII = MI;
   1451   MachineBasicBlock *MBB = MI->getParent();
   1452   if (MI->isImplicitDef()) {
   1453     unsigned R = MI->getOperand(0).getReg();
   1454     if (Hexagon::IntRegsRegClass.contains(R)) {
   1455       MCSuperRegIterator S(R, HRI, false);
   1456       MI->addOperand(MachineOperand::CreateReg(*S, true, true));
   1457     }
   1458     return MII;
   1459   }
   1460   assert(ResourceTracker->canReserveResources(MI));
   1461 
   1462   bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
   1463   bool Good = true;
   1464 
   1465   if (GlueToNewValueJump) {
   1466     MachineInstr *NvjMI = ++MII;
   1467     // We need to put both instructions in the same packet: MI and NvjMI.
   1468     // Either of them can require a constant extender. Try to add both to
   1469     // the current packet, and if that fails, end the packet and start a
   1470     // new one.
   1471     ResourceTracker->reserveResources(MI);
   1472     if (ExtMI)
   1473       Good = tryAllocateResourcesForConstExt(true);
   1474 
   1475     bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
   1476     if (Good) {
   1477       if (ResourceTracker->canReserveResources(NvjMI))
   1478         ResourceTracker->reserveResources(NvjMI);
   1479       else
   1480         Good = false;
   1481     }
   1482     if (Good && ExtNvjMI)
   1483       Good = tryAllocateResourcesForConstExt(true);
   1484 
   1485     if (!Good) {
   1486       endPacket(MBB, MI);
   1487       assert(ResourceTracker->canReserveResources(MI));
   1488       ResourceTracker->reserveResources(MI);
   1489       if (ExtMI) {
   1490         assert(canReserveResourcesForConstExt());
   1491         tryAllocateResourcesForConstExt(true);
   1492       }
   1493       assert(ResourceTracker->canReserveResources(NvjMI));
   1494       ResourceTracker->reserveResources(NvjMI);
   1495       if (ExtNvjMI) {
   1496         assert(canReserveResourcesForConstExt());
   1497         reserveResourcesForConstExt();
   1498       }
   1499     }
   1500     CurrentPacketMIs.push_back(MI);
   1501     CurrentPacketMIs.push_back(NvjMI);
   1502     return MII;
   1503   }
   1504 
   1505   ResourceTracker->reserveResources(MI);
   1506   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
   1507     endPacket(MBB, MI);
   1508     if (PromotedToDotNew)
   1509       demoteToDotOld(MI);
   1510     ResourceTracker->reserveResources(MI);
   1511     reserveResourcesForConstExt();
   1512   }
   1513 
   1514   CurrentPacketMIs.push_back(MI);
   1515   return MII;
   1516 }
   1517 
   1518 void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
   1519       MachineInstr *MI) {
   1520   OldPacketMIs = CurrentPacketMIs;
   1521   VLIWPacketizerList::endPacket(MBB, MI);
   1522 }
   1523 
   1524 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr *MI) {
   1525   return !producesStall(MI);
   1526 }
   1527 
   1528 
   1529 // Return true when ConsMI uses a register defined by ProdMI.
   1530 static bool isDependent(const MachineInstr *ProdMI,
   1531       const MachineInstr *ConsMI) {
   1532   if (!ProdMI->getOperand(0).isReg())
   1533     return false;
   1534   unsigned DstReg = ProdMI->getOperand(0).getReg();
   1535 
   1536   for (auto &Op : ConsMI->operands())
   1537     if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
   1538       // The MIs depend on each other.
   1539       return true;
   1540 
   1541   return false;
   1542 }
   1543 
   1544 // V60 forward scheduling.
   1545 bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
   1546   // Check whether the previous packet is in a different loop. If this is the
   1547   // case, there is little point in trying to avoid a stall because that would
   1548   // favor the rare case (loop entry) over the common case (loop iteration).
   1549   //
   1550   // TODO: We should really be able to check all the incoming edges if this is
   1551   // the first packet in a basic block, so we can avoid stalls from the loop
   1552   // backedge.
   1553   if (!OldPacketMIs.empty()) {
   1554     auto *OldBB = OldPacketMIs.front()->getParent();
   1555     auto *ThisBB = I->getParent();
   1556     if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
   1557       return false;
   1558   }
   1559 
   1560   // Check for stall between two vector instructions.
   1561   if (HII->isV60VectorInstruction(I)) {
   1562     for (auto J : OldPacketMIs) {
   1563       if (!HII->isV60VectorInstruction(J))
   1564         continue;
   1565       if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
   1566         return true;
   1567     }
   1568     return false;
   1569   }
   1570 
   1571   // Check for stall between two scalar instructions. First, check that
   1572   // there is no definition of a use in the current packet, because it
   1573   // may be a candidate for .new.
   1574   for (auto J : CurrentPacketMIs)
   1575     if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
   1576       return false;
   1577 
   1578   // Check for stall between I and instructions in the previous packet.
   1579   if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
   1580     for (auto J : OldPacketMIs) {
   1581       if (HII->isV60VectorInstruction(J))
   1582         continue;
   1583       if (!HII->isLateInstrFeedsEarlyInstr(J, I))
   1584         continue;
   1585       if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
   1586         return true;
   1587     }
   1588   }
   1589 
   1590   return false;
   1591 }
   1592 
   1593 
   1594 //===----------------------------------------------------------------------===//
   1595 //                         Public Constructor Functions
   1596 //===----------------------------------------------------------------------===//
   1597 
   1598 FunctionPass *llvm::createHexagonPacketizer() {
   1599   return new HexagonPacketizer();
   1600 }
   1601 
   1602