Home | History | Annotate | Download | only in Hexagon
      1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass identifies loops where we can generate the Hexagon hardware
     11 // loop instruction.  The hardware loop can perform loop branches with a
     12 // zero-cycle overhead.
     13 //
     14 // The pattern that defines the induction variable can changed depending on
     15 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
     16 // normalizes induction variables, and the Loop Strength Reduction pass
     17 // run by 'llc' may also make changes to the induction variable.
     18 // The pattern detected by this phase is due to running Strength Reduction.
     19 //
     20 // Criteria for hardware loops:
     21 //  - Countable loops (w/ ind. var for a trip count)
     22 //  - Assumes loops are normalized by IndVarSimplify
     23 //  - Try inner-most loops first
     24 //  - No nested hardware loops.
     25 //  - No function calls in loops.
     26 //
     27 //===----------------------------------------------------------------------===//
     28 
     29 #include "llvm/ADT/SmallSet.h"
     30 #include "Hexagon.h"
     31 #include "HexagonTargetMachine.h"
     32 #include "llvm/ADT/Statistic.h"
     33 #include "llvm/CodeGen/MachineDominators.h"
     34 #include "llvm/CodeGen/MachineFunction.h"
     35 #include "llvm/CodeGen/MachineFunctionPass.h"
     36 #include "llvm/CodeGen/MachineInstrBuilder.h"
     37 #include "llvm/CodeGen/MachineLoopInfo.h"
     38 #include "llvm/CodeGen/MachineRegisterInfo.h"
     39 #include "llvm/PassSupport.h"
     40 #include "llvm/Support/CommandLine.h"
     41 #include "llvm/Support/Debug.h"
     42 #include "llvm/Support/raw_ostream.h"
     43 #include "llvm/Target/TargetInstrInfo.h"
     44 #include <algorithm>
     45 #include <vector>
     46 
     47 using namespace llvm;
     48 
     49 #define DEBUG_TYPE "hwloops"
     50 
     51 #ifndef NDEBUG
     52 static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
     53 #endif
     54 
     55 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
     56 
     57 namespace llvm {
     58   void initializeHexagonHardwareLoopsPass(PassRegistry&);
     59 }
     60 
     61 namespace {
     62   class CountValue;
     63   struct HexagonHardwareLoops : public MachineFunctionPass {
     64     MachineLoopInfo            *MLI;
     65     MachineRegisterInfo        *MRI;
     66     MachineDominatorTree       *MDT;
     67     const HexagonTargetMachine *TM;
     68     const HexagonInstrInfo     *TII;
     69     const HexagonRegisterInfo  *TRI;
     70 #ifndef NDEBUG
     71     static int Counter;
     72 #endif
     73 
     74   public:
     75     static char ID;
     76 
     77     HexagonHardwareLoops() : MachineFunctionPass(ID) {
     78       initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
     79     }
     80 
     81     bool runOnMachineFunction(MachineFunction &MF) override;
     82 
     83     const char *getPassName() const override { return "Hexagon Hardware Loops"; }
     84 
     85     void getAnalysisUsage(AnalysisUsage &AU) const override {
     86       AU.addRequired<MachineDominatorTree>();
     87       AU.addRequired<MachineLoopInfo>();
     88       MachineFunctionPass::getAnalysisUsage(AU);
     89     }
     90 
     91   private:
     92     /// Kinds of comparisons in the compare instructions.
     93     struct Comparison {
     94       enum Kind {
     95         EQ  = 0x01,
     96         NE  = 0x02,
     97         L   = 0x04, // Less-than property.
     98         G   = 0x08, // Greater-than property.
     99         U   = 0x40, // Unsigned property.
    100         LTs = L,
    101         LEs = L | EQ,
    102         GTs = G,
    103         GEs = G | EQ,
    104         LTu = L      | U,
    105         LEu = L | EQ | U,
    106         GTu = G      | U,
    107         GEu = G | EQ | U
    108       };
    109 
    110       static Kind getSwappedComparison(Kind Cmp) {
    111         assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
    112         if ((Cmp & L) || (Cmp & G))
    113           return (Kind)(Cmp ^ (L|G));
    114         return Cmp;
    115       }
    116     };
    117 
    118     /// \brief Find the register that contains the loop controlling
    119     /// induction variable.
    120     /// If successful, it will return true and set the \p Reg, \p IVBump
    121     /// and \p IVOp arguments.  Otherwise it will return false.
    122     /// The returned induction register is the register R that follows the
    123     /// following induction pattern:
    124     /// loop:
    125     ///   R = phi ..., [ R.next, LatchBlock ]
    126     ///   R.next = R + #bump
    127     ///   if (R.next < #N) goto loop
    128     /// IVBump is the immediate value added to R, and IVOp is the instruction
    129     /// "R.next = R + #bump".
    130     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
    131                                int64_t &IVBump, MachineInstr *&IVOp) const;
    132 
    133     /// \brief Analyze the statements in a loop to determine if the loop
    134     /// has a computable trip count and, if so, return a value that represents
    135     /// the trip count expression.
    136     CountValue *getLoopTripCount(MachineLoop *L,
    137                                  SmallVectorImpl<MachineInstr *> &OldInsts);
    138 
    139     /// \brief Return the expression that represents the number of times
    140     /// a loop iterates.  The function takes the operands that represent the
    141     /// loop start value, loop end value, and induction value.  Based upon
    142     /// these operands, the function attempts to compute the trip count.
    143     /// If the trip count is not directly available (as an immediate value,
    144     /// or a register), the function will attempt to insert computation of it
    145     /// to the loop's preheader.
    146     CountValue *computeCount(MachineLoop *Loop,
    147                              const MachineOperand *Start,
    148                              const MachineOperand *End,
    149                              unsigned IVReg,
    150                              int64_t IVBump,
    151                              Comparison::Kind Cmp) const;
    152 
    153     /// \brief Return true if the instruction is not valid within a hardware
    154     /// loop.
    155     bool isInvalidLoopOperation(const MachineInstr *MI) const;
    156 
    157     /// \brief Return true if the loop contains an instruction that inhibits
    158     /// using the hardware loop.
    159     bool containsInvalidInstruction(MachineLoop *L) const;
    160 
    161     /// \brief Given a loop, check if we can convert it to a hardware loop.
    162     /// If so, then perform the conversion and return true.
    163     bool convertToHardwareLoop(MachineLoop *L);
    164 
    165     /// \brief Return true if the instruction is now dead.
    166     bool isDead(const MachineInstr *MI,
    167                 SmallVectorImpl<MachineInstr *> &DeadPhis) const;
    168 
    169     /// \brief Remove the instruction if it is now dead.
    170     void removeIfDead(MachineInstr *MI);
    171 
    172     /// \brief Make sure that the "bump" instruction executes before the
    173     /// compare.  We need that for the IV fixup, so that the compare
    174     /// instruction would not use a bumped value that has not yet been
    175     /// defined.  If the instructions are out of order, try to reorder them.
    176     bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
    177 
    178     /// \brief Get the instruction that loads an immediate value into \p R,
    179     /// or 0 if such an instruction does not exist.
    180     MachineInstr *defWithImmediate(unsigned R);
    181 
    182     /// \brief Get the immediate value referenced to by \p MO, either for
    183     /// immediate operands, or for register operands, where the register
    184     /// was defined with an immediate value.
    185     int64_t getImmediate(MachineOperand &MO);
    186 
    187     /// \brief Reset the given machine operand to now refer to a new immediate
    188     /// value.  Assumes that the operand was already referencing an immediate
    189     /// value, either directly, or via a register.
    190     void setImmediate(MachineOperand &MO, int64_t Val);
    191 
    192     /// \brief Fix the data flow of the induction varible.
    193     /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
    194     ///                                     |
    195     ///                                     +-> back to phi
    196     /// where "bump" is the increment of the induction variable:
    197     ///   iv = iv + #const.
    198     /// Due to some prior code transformations, the actual flow may look
    199     /// like this:
    200     ///   phi -+-> bump ---> back to phi
    201     ///        |
    202     ///        +-> comparison-in-latch (against upper_bound-bump),
    203     /// i.e. the comparison that controls the loop execution may be using
    204     /// the value of the induction variable from before the increment.
    205     ///
    206     /// Return true if the loop's flow is the desired one (i.e. it's
    207     /// either been fixed, or no fixing was necessary).
    208     /// Otherwise, return false.  This can happen if the induction variable
    209     /// couldn't be identified, or if the value in the latch's comparison
    210     /// cannot be adjusted to reflect the post-bump value.
    211     bool fixupInductionVariable(MachineLoop *L);
    212 
    213     /// \brief Given a loop, if it does not have a preheader, create one.
    214     /// Return the block that is the preheader.
    215     MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
    216   };
    217 
    218   char HexagonHardwareLoops::ID = 0;
    219 #ifndef NDEBUG
    220   int HexagonHardwareLoops::Counter = 0;
    221 #endif
    222 
    223   /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
    224   /// of the MachineOperand class without the concerns of changing the
    225   /// operand representation.
    226   class CountValue {
    227   public:
    228     enum CountValueType {
    229       CV_Register,
    230       CV_Immediate
    231     };
    232   private:
    233     CountValueType Kind;
    234     union Values {
    235       struct {
    236         unsigned Reg;
    237         unsigned Sub;
    238       } R;
    239       unsigned ImmVal;
    240     } Contents;
    241 
    242   public:
    243     explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
    244       Kind = t;
    245       if (Kind == CV_Register) {
    246         Contents.R.Reg = v;
    247         Contents.R.Sub = u;
    248       } else {
    249         Contents.ImmVal = v;
    250       }
    251     }
    252     bool isReg() const { return Kind == CV_Register; }
    253     bool isImm() const { return Kind == CV_Immediate; }
    254 
    255     unsigned getReg() const {
    256       assert(isReg() && "Wrong CountValue accessor");
    257       return Contents.R.Reg;
    258     }
    259     unsigned getSubReg() const {
    260       assert(isReg() && "Wrong CountValue accessor");
    261       return Contents.R.Sub;
    262     }
    263     unsigned getImm() const {
    264       assert(isImm() && "Wrong CountValue accessor");
    265       return Contents.ImmVal;
    266     }
    267 
    268     void print(raw_ostream &OS, const TargetMachine *TM = nullptr) const {
    269       const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : nullptr;
    270       if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
    271       if (isImm()) { OS << Contents.ImmVal; }
    272     }
    273   };
    274 } // end anonymous namespace
    275 
    276 
    277 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
    278                       "Hexagon Hardware Loops", false, false)
    279 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    280 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    281 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
    282                     "Hexagon Hardware Loops", false, false)
    283 
    284 
    285 /// \brief Returns true if the instruction is a hardware loop instruction.
    286 static bool isHardwareLoop(const MachineInstr *MI) {
    287   return MI->getOpcode() == Hexagon::LOOP0_r ||
    288     MI->getOpcode() == Hexagon::LOOP0_i;
    289 }
    290 
    291 FunctionPass *llvm::createHexagonHardwareLoops() {
    292   return new HexagonHardwareLoops();
    293 }
    294 
    295 
    296 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
    297   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
    298 
    299   bool Changed = false;
    300 
    301   MLI = &getAnalysis<MachineLoopInfo>();
    302   MRI = &MF.getRegInfo();
    303   MDT = &getAnalysis<MachineDominatorTree>();
    304   TM  = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
    305   TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
    306   TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
    307 
    308   for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
    309        I != E; ++I) {
    310     MachineLoop *L = *I;
    311     if (!L->getParentLoop())
    312       Changed |= convertToHardwareLoop(L);
    313   }
    314 
    315   return Changed;
    316 }
    317 
    318 
    319 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
    320                                                  unsigned &Reg,
    321                                                  int64_t &IVBump,
    322                                                  MachineInstr *&IVOp
    323                                                  ) const {
    324   MachineBasicBlock *Header = L->getHeader();
    325   MachineBasicBlock *Preheader = L->getLoopPreheader();
    326   MachineBasicBlock *Latch = L->getLoopLatch();
    327   if (!Header || !Preheader || !Latch)
    328     return false;
    329 
    330   // This pair represents an induction register together with an immediate
    331   // value that will be added to it in each loop iteration.
    332   typedef std::pair<unsigned,int64_t> RegisterBump;
    333 
    334   // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
    335   // from an induction operation
    336   //   R.next = R + bump
    337   // where bump is an immediate value.
    338   typedef std::map<unsigned,RegisterBump> InductionMap;
    339 
    340   InductionMap IndMap;
    341 
    342   typedef MachineBasicBlock::instr_iterator instr_iterator;
    343   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
    344        I != E && I->isPHI(); ++I) {
    345     MachineInstr *Phi = &*I;
    346 
    347     // Have a PHI instruction.  Get the operand that corresponds to the
    348     // latch block, and see if is a result of an addition of form "reg+imm",
    349     // where the "reg" is defined by the PHI node we are looking at.
    350     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
    351       if (Phi->getOperand(i+1).getMBB() != Latch)
    352         continue;
    353 
    354       unsigned PhiOpReg = Phi->getOperand(i).getReg();
    355       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
    356       unsigned UpdOpc = DI->getOpcode();
    357       bool isAdd = (UpdOpc == Hexagon::ADD_ri);
    358 
    359       if (isAdd) {
    360         // If the register operand to the add is the PHI we're
    361         // looking at, this meets the induction pattern.
    362         unsigned IndReg = DI->getOperand(1).getReg();
    363         if (MRI->getVRegDef(IndReg) == Phi) {
    364           unsigned UpdReg = DI->getOperand(0).getReg();
    365           int64_t V = DI->getOperand(2).getImm();
    366           IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
    367         }
    368       }
    369     }  // for (i)
    370   }  // for (instr)
    371 
    372   SmallVector<MachineOperand,2> Cond;
    373   MachineBasicBlock *TB = nullptr, *FB = nullptr;
    374   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
    375   if (NotAnalyzed)
    376     return false;
    377 
    378   unsigned CSz = Cond.size();
    379   assert (CSz == 1 || CSz == 2);
    380   unsigned PredR = Cond[CSz-1].getReg();
    381 
    382   MachineInstr *PredI = MRI->getVRegDef(PredR);
    383   if (!PredI->isCompare())
    384     return false;
    385 
    386   unsigned CmpReg1 = 0, CmpReg2 = 0;
    387   int CmpImm = 0, CmpMask = 0;
    388   bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
    389                                          CmpMask, CmpImm);
    390   // Fail if the compare was not analyzed, or it's not comparing a register
    391   // with an immediate value.  Not checking the mask here, since we handle
    392   // the individual compare opcodes (including CMPb) later on.
    393   if (!CmpAnalyzed)
    394     return false;
    395 
    396   // Exactly one of the input registers to the comparison should be among
    397   // the induction registers.
    398   InductionMap::iterator IndMapEnd = IndMap.end();
    399   InductionMap::iterator F = IndMapEnd;
    400   if (CmpReg1 != 0) {
    401     InductionMap::iterator F1 = IndMap.find(CmpReg1);
    402     if (F1 != IndMapEnd)
    403       F = F1;
    404   }
    405   if (CmpReg2 != 0) {
    406     InductionMap::iterator F2 = IndMap.find(CmpReg2);
    407     if (F2 != IndMapEnd) {
    408       if (F != IndMapEnd)
    409         return false;
    410       F = F2;
    411     }
    412   }
    413   if (F == IndMapEnd)
    414     return false;
    415 
    416   Reg = F->second.first;
    417   IVBump = F->second.second;
    418   IVOp = MRI->getVRegDef(F->first);
    419   return true;
    420 }
    421 
    422 
    423 /// \brief Analyze the statements in a loop to determine if the loop has
    424 /// a computable trip count and, if so, return a value that represents
    425 /// the trip count expression.
    426 ///
    427 /// This function iterates over the phi nodes in the loop to check for
    428 /// induction variable patterns that are used in the calculation for
    429 /// the number of time the loop is executed.
    430 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
    431                                     SmallVectorImpl<MachineInstr *> &OldInsts) {
    432   MachineBasicBlock *TopMBB = L->getTopBlock();
    433   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
    434   assert(PI != TopMBB->pred_end() &&
    435          "Loop must have more than one incoming edge!");
    436   MachineBasicBlock *Backedge = *PI++;
    437   if (PI == TopMBB->pred_end())  // dead loop?
    438     return nullptr;
    439   MachineBasicBlock *Incoming = *PI++;
    440   if (PI != TopMBB->pred_end())  // multiple backedges?
    441     return nullptr;
    442 
    443   // Make sure there is one incoming and one backedge and determine which
    444   // is which.
    445   if (L->contains(Incoming)) {
    446     if (L->contains(Backedge))
    447       return nullptr;
    448     std::swap(Incoming, Backedge);
    449   } else if (!L->contains(Backedge))
    450     return nullptr;
    451 
    452   // Look for the cmp instruction to determine if we can get a useful trip
    453   // count.  The trip count can be either a register or an immediate.  The
    454   // location of the value depends upon the type (reg or imm).
    455   MachineBasicBlock *Latch = L->getLoopLatch();
    456   if (!Latch)
    457     return nullptr;
    458 
    459   unsigned IVReg = 0;
    460   int64_t IVBump = 0;
    461   MachineInstr *IVOp;
    462   bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
    463   if (!FoundIV)
    464     return nullptr;
    465 
    466   MachineBasicBlock *Preheader = L->getLoopPreheader();
    467 
    468   MachineOperand *InitialValue = nullptr;
    469   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
    470   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
    471     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
    472     if (MBB == Preheader)
    473       InitialValue = &IV_Phi->getOperand(i);
    474     else if (MBB == Latch)
    475       IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
    476   }
    477   if (!InitialValue)
    478     return nullptr;
    479 
    480   SmallVector<MachineOperand,2> Cond;
    481   MachineBasicBlock *TB = nullptr, *FB = nullptr;
    482   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
    483   if (NotAnalyzed)
    484     return nullptr;
    485 
    486   MachineBasicBlock *Header = L->getHeader();
    487   // TB must be non-null.  If FB is also non-null, one of them must be
    488   // the header.  Otherwise, branch to TB could be exiting the loop, and
    489   // the fall through can go to the header.
    490   assert (TB && "Latch block without a branch?");
    491   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
    492   if (!TB || (FB && TB != Header && FB != Header))
    493     return nullptr;
    494 
    495   // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
    496   // to put imm(0), followed by P in the vector Cond.
    497   // If TB is not the header, it means that the "not-taken" path must lead
    498   // to the header.
    499   bool Negated = (Cond.size() > 1) ^ (TB != Header);
    500   unsigned PredReg = Cond[Cond.size()-1].getReg();
    501   MachineInstr *CondI = MRI->getVRegDef(PredReg);
    502   unsigned CondOpc = CondI->getOpcode();
    503 
    504   unsigned CmpReg1 = 0, CmpReg2 = 0;
    505   int Mask = 0, ImmValue = 0;
    506   bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
    507                                          Mask, ImmValue);
    508   if (!AnalyzedCmp)
    509     return nullptr;
    510 
    511   // The comparison operator type determines how we compute the loop
    512   // trip count.
    513   OldInsts.push_back(CondI);
    514   OldInsts.push_back(IVOp);
    515 
    516   // Sadly, the following code gets information based on the position
    517   // of the operands in the compare instruction.  This has to be done
    518   // this way, because the comparisons check for a specific relationship
    519   // between the operands (e.g. is-less-than), rather than to find out
    520   // what relationship the operands are in (as on PPC).
    521   Comparison::Kind Cmp;
    522   bool isSwapped = false;
    523   const MachineOperand &Op1 = CondI->getOperand(1);
    524   const MachineOperand &Op2 = CondI->getOperand(2);
    525   const MachineOperand *EndValue = nullptr;
    526 
    527   if (Op1.isReg()) {
    528     if (Op2.isImm() || Op1.getReg() == IVReg)
    529       EndValue = &Op2;
    530     else {
    531       EndValue = &Op1;
    532       isSwapped = true;
    533     }
    534   }
    535 
    536   if (!EndValue)
    537     return nullptr;
    538 
    539   switch (CondOpc) {
    540     case Hexagon::CMPEQri:
    541     case Hexagon::CMPEQrr:
    542       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
    543       break;
    544     case Hexagon::CMPGTUri:
    545     case Hexagon::CMPGTUrr:
    546       Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
    547       break;
    548     case Hexagon::CMPGTri:
    549     case Hexagon::CMPGTrr:
    550       Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
    551       break;
    552     // Very limited support for byte/halfword compares.
    553     case Hexagon::CMPbEQri_V4:
    554     case Hexagon::CMPhEQri_V4: {
    555       if (IVBump != 1)
    556         return nullptr;
    557 
    558       int64_t InitV, EndV;
    559       // Since the comparisons are "ri", the EndValue should be an
    560       // immediate.  Check it just in case.
    561       assert(EndValue->isImm() && "Unrecognized latch comparison");
    562       EndV = EndValue->getImm();
    563       // Allow InitialValue to be a register defined with an immediate.
    564       if (InitialValue->isReg()) {
    565         if (!defWithImmediate(InitialValue->getReg()))
    566           return nullptr;
    567         InitV = getImmediate(*InitialValue);
    568       } else {
    569         assert(InitialValue->isImm());
    570         InitV = InitialValue->getImm();
    571       }
    572       if (InitV >= EndV)
    573         return nullptr;
    574       if (CondOpc == Hexagon::CMPbEQri_V4) {
    575         if (!isInt<8>(InitV) || !isInt<8>(EndV))
    576           return nullptr;
    577       } else {  // Hexagon::CMPhEQri_V4
    578         if (!isInt<16>(InitV) || !isInt<16>(EndV))
    579           return nullptr;
    580       }
    581       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
    582       break;
    583     }
    584     default:
    585       return nullptr;
    586   }
    587 
    588   if (isSwapped)
    589    Cmp = Comparison::getSwappedComparison(Cmp);
    590 
    591   if (InitialValue->isReg()) {
    592     unsigned R = InitialValue->getReg();
    593     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
    594     if (!MDT->properlyDominates(DefBB, Header))
    595       return nullptr;
    596     OldInsts.push_back(MRI->getVRegDef(R));
    597   }
    598   if (EndValue->isReg()) {
    599     unsigned R = EndValue->getReg();
    600     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
    601     if (!MDT->properlyDominates(DefBB, Header))
    602       return nullptr;
    603   }
    604 
    605   return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
    606 }
    607 
    608 /// \brief Helper function that returns the expression that represents the
    609 /// number of times a loop iterates.  The function takes the operands that
    610 /// represent the loop start value, loop end value, and induction value.
    611 /// Based upon these operands, the function attempts to compute the trip count.
    612 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
    613                                                const MachineOperand *Start,
    614                                                const MachineOperand *End,
    615                                                unsigned IVReg,
    616                                                int64_t IVBump,
    617                                                Comparison::Kind Cmp) const {
    618   // Cannot handle comparison EQ, i.e. while (A == B).
    619   if (Cmp == Comparison::EQ)
    620     return nullptr;
    621 
    622   // Check if either the start or end values are an assignment of an immediate.
    623   // If so, use the immediate value rather than the register.
    624   if (Start->isReg()) {
    625     const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
    626     if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
    627       Start = &StartValInstr->getOperand(1);
    628   }
    629   if (End->isReg()) {
    630     const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
    631     if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
    632       End = &EndValInstr->getOperand(1);
    633   }
    634 
    635   assert (Start->isReg() || Start->isImm());
    636   assert (End->isReg() || End->isImm());
    637 
    638   bool CmpLess =     Cmp & Comparison::L;
    639   bool CmpGreater =  Cmp & Comparison::G;
    640   bool CmpHasEqual = Cmp & Comparison::EQ;
    641 
    642   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
    643   // If loop executes while iv is "less" with the iv value going down, then
    644   // the iv must wrap.
    645   if (CmpLess && IVBump < 0)
    646     return nullptr;
    647   // If loop executes while iv is "greater" with the iv value going up, then
    648   // the iv must wrap.
    649   if (CmpGreater && IVBump > 0)
    650     return nullptr;
    651 
    652   if (Start->isImm() && End->isImm()) {
    653     // Both, start and end are immediates.
    654     int64_t StartV = Start->getImm();
    655     int64_t EndV = End->getImm();
    656     int64_t Dist = EndV - StartV;
    657     if (Dist == 0)
    658       return nullptr;
    659 
    660     bool Exact = (Dist % IVBump) == 0;
    661 
    662     if (Cmp == Comparison::NE) {
    663       if (!Exact)
    664         return nullptr;
    665       if ((Dist < 0) ^ (IVBump < 0))
    666         return nullptr;
    667     }
    668 
    669     // For comparisons that include the final value (i.e. include equality
    670     // with the final value), we need to increase the distance by 1.
    671     if (CmpHasEqual)
    672       Dist = Dist > 0 ? Dist+1 : Dist-1;
    673 
    674     // assert (CmpLess => Dist > 0);
    675     assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
    676     // assert (CmpGreater => Dist < 0);
    677     assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
    678 
    679     // "Normalized" distance, i.e. with the bump set to +-1.
    680     int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump-1)) /   IVBump
    681                                :  (-Dist + (-IVBump-1)) / (-IVBump);
    682     assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
    683 
    684     uint64_t Count = Dist1;
    685 
    686     if (Count > 0xFFFFFFFFULL)
    687       return nullptr;
    688 
    689     return new CountValue(CountValue::CV_Immediate, Count);
    690   }
    691 
    692   // A general case: Start and End are some values, but the actual
    693   // iteration count may not be available.  If it is not, insert
    694   // a computation of it into the preheader.
    695 
    696   // If the induction variable bump is not a power of 2, quit.
    697   // Othwerise we'd need a general integer division.
    698   if (!isPowerOf2_64(abs64(IVBump)))
    699     return nullptr;
    700 
    701   MachineBasicBlock *PH = Loop->getLoopPreheader();
    702   assert (PH && "Should have a preheader by now");
    703   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
    704   DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
    705                                          : DebugLoc();
    706 
    707   // If Start is an immediate and End is a register, the trip count
    708   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
    709   // is actually "reg + -imm".
    710 
    711   // If the loop IV is going downwards, i.e. if the bump is negative,
    712   // then the iteration count (computed as End-Start) will need to be
    713   // negated.  To avoid the negation, just swap Start and End.
    714   if (IVBump < 0) {
    715     std::swap(Start, End);
    716     IVBump = -IVBump;
    717   }
    718   // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
    719   // Signedness, and "including equality" are preserved.
    720 
    721   bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
    722   bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
    723 
    724   int64_t StartV = 0, EndV = 0;
    725   if (Start->isImm())
    726     StartV = Start->getImm();
    727   if (End->isImm())
    728     EndV = End->getImm();
    729 
    730   int64_t AdjV = 0;
    731   // To compute the iteration count, we would need this computation:
    732   //   Count = (End - Start + (IVBump-1)) / IVBump
    733   // or, when CmpHasEqual:
    734   //   Count = (End - Start + (IVBump-1)+1) / IVBump
    735   // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
    736   // generating an instruction specifically to add it if we can adjust
    737   // the immediate values for Start or End.
    738 
    739   if (CmpHasEqual) {
    740     // Need to add 1 to the total iteration count.
    741     if (Start->isImm())
    742       StartV--;
    743     else if (End->isImm())
    744       EndV++;
    745     else
    746       AdjV += 1;
    747   }
    748 
    749   if (Cmp != Comparison::NE) {
    750     if (Start->isImm())
    751       StartV -= (IVBump-1);
    752     else if (End->isImm())
    753       EndV += (IVBump-1);
    754     else
    755       AdjV += (IVBump-1);
    756   }
    757 
    758   unsigned R = 0, SR = 0;
    759   if (Start->isReg()) {
    760     R = Start->getReg();
    761     SR = Start->getSubReg();
    762   } else {
    763     R = End->getReg();
    764     SR = End->getSubReg();
    765   }
    766   const TargetRegisterClass *RC = MRI->getRegClass(R);
    767   // Hardware loops cannot handle 64-bit registers.  If it's a double
    768   // register, it has to have a subregister.
    769   if (!SR && RC == &Hexagon::DoubleRegsRegClass)
    770     return nullptr;
    771   const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
    772 
    773   // Compute DistR (register with the distance between Start and End).
    774   unsigned DistR, DistSR;
    775 
    776   // Avoid special case, where the start value is an imm(0).
    777   if (Start->isImm() && StartV == 0) {
    778     DistR = End->getReg();
    779     DistSR = End->getSubReg();
    780   } else {
    781     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
    782                               (RegToImm ? TII->get(Hexagon::SUB_ri) :
    783                                           TII->get(Hexagon::ADD_ri));
    784     unsigned SubR = MRI->createVirtualRegister(IntRC);
    785     MachineInstrBuilder SubIB =
    786       BuildMI(*PH, InsertPos, DL, SubD, SubR);
    787 
    788     if (RegToReg) {
    789       SubIB.addReg(End->getReg(), 0, End->getSubReg())
    790            .addReg(Start->getReg(), 0, Start->getSubReg());
    791     } else if (RegToImm) {
    792       SubIB.addImm(EndV)
    793            .addReg(Start->getReg(), 0, Start->getSubReg());
    794     } else { // ImmToReg
    795       SubIB.addReg(End->getReg(), 0, End->getSubReg())
    796            .addImm(-StartV);
    797     }
    798     DistR = SubR;
    799     DistSR = 0;
    800   }
    801 
    802   // From DistR, compute AdjR (register with the adjusted distance).
    803   unsigned AdjR, AdjSR;
    804 
    805   if (AdjV == 0) {
    806     AdjR = DistR;
    807     AdjSR = DistSR;
    808   } else {
    809     // Generate CountR = ADD DistR, AdjVal
    810     unsigned AddR = MRI->createVirtualRegister(IntRC);
    811     const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
    812     BuildMI(*PH, InsertPos, DL, AddD, AddR)
    813       .addReg(DistR, 0, DistSR)
    814       .addImm(AdjV);
    815 
    816     AdjR = AddR;
    817     AdjSR = 0;
    818   }
    819 
    820   // From AdjR, compute CountR (register with the final count).
    821   unsigned CountR, CountSR;
    822 
    823   if (IVBump == 1) {
    824     CountR = AdjR;
    825     CountSR = AdjSR;
    826   } else {
    827     // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
    828     unsigned Shift = Log2_32(IVBump);
    829 
    830     // Generate NormR = LSR DistR, Shift.
    831     unsigned LsrR = MRI->createVirtualRegister(IntRC);
    832     const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
    833     BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
    834       .addReg(AdjR, 0, AdjSR)
    835       .addImm(Shift);
    836 
    837     CountR = LsrR;
    838     CountSR = 0;
    839   }
    840 
    841   return new CountValue(CountValue::CV_Register, CountR, CountSR);
    842 }
    843 
    844 
    845 /// \brief Return true if the operation is invalid within hardware loop.
    846 bool HexagonHardwareLoops::isInvalidLoopOperation(
    847       const MachineInstr *MI) const {
    848 
    849   // call is not allowed because the callee may use a hardware loop
    850   if (MI->getDesc().isCall())
    851     return true;
    852 
    853   // do not allow nested hardware loops
    854   if (isHardwareLoop(MI))
    855     return true;
    856 
    857   // check if the instruction defines a hardware loop register
    858   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    859     const MachineOperand &MO = MI->getOperand(i);
    860     if (!MO.isReg() || !MO.isDef())
    861       continue;
    862     unsigned R = MO.getReg();
    863     if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
    864         R == Hexagon::SA0 || R == Hexagon::SA1)
    865       return true;
    866   }
    867   return false;
    868 }
    869 
    870 
    871 /// \brief - Return true if the loop contains an instruction that inhibits
    872 /// the use of the hardware loop function.
    873 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
    874   const std::vector<MachineBasicBlock *> &Blocks = L->getBlocks();
    875   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    876     MachineBasicBlock *MBB = Blocks[i];
    877     for (MachineBasicBlock::iterator
    878            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
    879       const MachineInstr *MI = &*MII;
    880       if (isInvalidLoopOperation(MI))
    881         return true;
    882     }
    883   }
    884   return false;
    885 }
    886 
    887 
    888 /// \brief Returns true if the instruction is dead.  This was essentially
    889 /// copied from DeadMachineInstructionElim::isDead, but with special cases
    890 /// for inline asm, physical registers and instructions with side effects
    891 /// removed.
    892 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
    893                               SmallVectorImpl<MachineInstr *> &DeadPhis) const {
    894   // Examine each operand.
    895   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    896     const MachineOperand &MO = MI->getOperand(i);
    897     if (!MO.isReg() || !MO.isDef())
    898       continue;
    899 
    900     unsigned Reg = MO.getReg();
    901     if (MRI->use_nodbg_empty(Reg))
    902       continue;
    903 
    904     typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
    905 
    906     // This instruction has users, but if the only user is the phi node for the
    907     // parent block, and the only use of that phi node is this instruction, then
    908     // this instruction is dead: both it (and the phi node) can be removed.
    909     use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
    910     use_nodbg_iterator End = MRI->use_nodbg_end();
    911     if (std::next(I) != End || !I->getParent()->isPHI())
    912       return false;
    913 
    914     MachineInstr *OnePhi = I->getParent();
    915     for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
    916       const MachineOperand &OPO = OnePhi->getOperand(j);
    917       if (!OPO.isReg() || !OPO.isDef())
    918         continue;
    919 
    920       unsigned OPReg = OPO.getReg();
    921       use_nodbg_iterator nextJ;
    922       for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
    923            J != End; J = nextJ) {
    924         nextJ = std::next(J);
    925         MachineOperand &Use = *J;
    926         MachineInstr *UseMI = Use.getParent();
    927 
    928         // If the phi node has a user that is not MI, bail...
    929         if (MI != UseMI)
    930           return false;
    931       }
    932     }
    933     DeadPhis.push_back(OnePhi);
    934   }
    935 
    936   // If there are no defs with uses, the instruction is dead.
    937   return true;
    938 }
    939 
    940 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
    941   // This procedure was essentially copied from DeadMachineInstructionElim.
    942 
    943   SmallVector<MachineInstr*, 1> DeadPhis;
    944   if (isDead(MI, DeadPhis)) {
    945     DEBUG(dbgs() << "HW looping will remove: " << *MI);
    946 
    947     // It is possible that some DBG_VALUE instructions refer to this
    948     // instruction.  Examine each def operand for such references;
    949     // if found, mark the DBG_VALUE as undef (but don't delete it).
    950     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    951       const MachineOperand &MO = MI->getOperand(i);
    952       if (!MO.isReg() || !MO.isDef())
    953         continue;
    954       unsigned Reg = MO.getReg();
    955       MachineRegisterInfo::use_iterator nextI;
    956       for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
    957            E = MRI->use_end(); I != E; I = nextI) {
    958         nextI = std::next(I);  // I is invalidated by the setReg
    959         MachineOperand &Use = *I;
    960         MachineInstr *UseMI = I->getParent();
    961         if (UseMI == MI)
    962           continue;
    963         if (Use.isDebug())
    964           UseMI->getOperand(0).setReg(0U);
    965         // This may also be a "instr -> phi -> instr" case which can
    966         // be removed too.
    967       }
    968     }
    969 
    970     MI->eraseFromParent();
    971     for (unsigned i = 0; i < DeadPhis.size(); ++i)
    972       DeadPhis[i]->eraseFromParent();
    973   }
    974 }
    975 
    976 /// \brief Check if the loop is a candidate for converting to a hardware
    977 /// loop.  If so, then perform the transformation.
    978 ///
    979 /// This function works on innermost loops first.  A loop can be converted
    980 /// if it is a counting loop; either a register value or an immediate.
    981 ///
    982 /// The code makes several assumptions about the representation of the loop
    983 /// in llvm.
    984 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
    985   // This is just for sanity.
    986   assert(L->getHeader() && "Loop without a header?");
    987 
    988   bool Changed = false;
    989   // Process nested loops first.
    990   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
    991     Changed |= convertToHardwareLoop(*I);
    992 
    993   // If a nested loop has been converted, then we can't convert this loop.
    994   if (Changed)
    995     return Changed;
    996 
    997 #ifndef NDEBUG
    998   // Stop trying after reaching the limit (if any).
    999   int Limit = HWLoopLimit;
   1000   if (Limit >= 0) {
   1001     if (Counter >= HWLoopLimit)
   1002       return false;
   1003     Counter++;
   1004   }
   1005 #endif
   1006 
   1007   // Does the loop contain any invalid instructions?
   1008   if (containsInvalidInstruction(L))
   1009     return false;
   1010 
   1011   // Is the induction variable bump feeding the latch condition?
   1012   if (!fixupInductionVariable(L))
   1013     return false;
   1014 
   1015   MachineBasicBlock *LastMBB = L->getExitingBlock();
   1016   // Don't generate hw loop if the loop has more than one exit.
   1017   if (!LastMBB)
   1018     return false;
   1019 
   1020   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
   1021   if (LastI == LastMBB->end())
   1022     return false;
   1023 
   1024   // Ensure the loop has a preheader: the loop instruction will be
   1025   // placed there.
   1026   bool NewPreheader = false;
   1027   MachineBasicBlock *Preheader = L->getLoopPreheader();
   1028   if (!Preheader) {
   1029     Preheader = createPreheaderForLoop(L);
   1030     if (!Preheader)
   1031       return false;
   1032     NewPreheader = true;
   1033   }
   1034   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
   1035 
   1036   SmallVector<MachineInstr*, 2> OldInsts;
   1037   // Are we able to determine the trip count for the loop?
   1038   CountValue *TripCount = getLoopTripCount(L, OldInsts);
   1039   if (!TripCount)
   1040     return false;
   1041 
   1042   // Is the trip count available in the preheader?
   1043   if (TripCount->isReg()) {
   1044     // There will be a use of the register inserted into the preheader,
   1045     // so make sure that the register is actually defined at that point.
   1046     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
   1047     MachineBasicBlock *BBDef = TCDef->getParent();
   1048     if (!NewPreheader) {
   1049       if (!MDT->dominates(BBDef, Preheader))
   1050         return false;
   1051     } else {
   1052       // If we have just created a preheader, the dominator tree won't be
   1053       // aware of it.  Check if the definition of the register dominates
   1054       // the header, but is not the header itself.
   1055       if (!MDT->properlyDominates(BBDef, L->getHeader()))
   1056         return false;
   1057     }
   1058   }
   1059 
   1060   // Determine the loop start.
   1061   MachineBasicBlock *LoopStart = L->getTopBlock();
   1062   if (L->getLoopLatch() != LastMBB) {
   1063     // When the exit and latch are not the same, use the latch block as the
   1064     // start.
   1065     // The loop start address is used only after the 1st iteration, and the
   1066     // loop latch may contains instrs. that need to be executed after the
   1067     // first iteration.
   1068     LoopStart = L->getLoopLatch();
   1069     // Make sure the latch is a successor of the exit, otherwise it won't work.
   1070     if (!LastMBB->isSuccessor(LoopStart))
   1071       return false;
   1072   }
   1073 
   1074   // Convert the loop to a hardware loop.
   1075   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
   1076   DebugLoc DL;
   1077   if (InsertPos != Preheader->end())
   1078     DL = InsertPos->getDebugLoc();
   1079 
   1080   if (TripCount->isReg()) {
   1081     // Create a copy of the loop count register.
   1082     unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
   1083     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
   1084       .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
   1085     // Add the Loop instruction to the beginning of the loop.
   1086     BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
   1087       .addMBB(LoopStart)
   1088       .addReg(CountReg);
   1089   } else {
   1090     assert(TripCount->isImm() && "Expecting immediate value for trip count");
   1091     // Add the Loop immediate instruction to the beginning of the loop,
   1092     // if the immediate fits in the instructions.  Otherwise, we need to
   1093     // create a new virtual register.
   1094     int64_t CountImm = TripCount->getImm();
   1095     if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
   1096       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
   1097       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
   1098         .addImm(CountImm);
   1099       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
   1100         .addMBB(LoopStart).addReg(CountReg);
   1101     } else
   1102       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
   1103         .addMBB(LoopStart).addImm(CountImm);
   1104   }
   1105 
   1106   // Make sure the loop start always has a reference in the CFG.  We need
   1107   // to create a BlockAddress operand to get this mechanism to work both the
   1108   // MachineBasicBlock and BasicBlock objects need the flag set.
   1109   LoopStart->setHasAddressTaken();
   1110   // This line is needed to set the hasAddressTaken flag on the BasicBlock
   1111   // object.
   1112   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
   1113 
   1114   // Replace the loop branch with an endloop instruction.
   1115   DebugLoc LastIDL = LastI->getDebugLoc();
   1116   BuildMI(*LastMBB, LastI, LastIDL,
   1117           TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
   1118 
   1119   // The loop ends with either:
   1120   //  - a conditional branch followed by an unconditional branch, or
   1121   //  - a conditional branch to the loop start.
   1122   if (LastI->getOpcode() == Hexagon::JMP_t ||
   1123       LastI->getOpcode() == Hexagon::JMP_f) {
   1124     // Delete one and change/add an uncond. branch to out of the loop.
   1125     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
   1126     LastI = LastMBB->erase(LastI);
   1127     if (!L->contains(BranchTarget)) {
   1128       if (LastI != LastMBB->end())
   1129         LastI = LastMBB->erase(LastI);
   1130       SmallVector<MachineOperand, 0> Cond;
   1131       TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
   1132     }
   1133   } else {
   1134     // Conditional branch to loop start; just delete it.
   1135     LastMBB->erase(LastI);
   1136   }
   1137   delete TripCount;
   1138 
   1139   // The induction operation and the comparison may now be
   1140   // unneeded. If these are unneeded, then remove them.
   1141   for (unsigned i = 0; i < OldInsts.size(); ++i)
   1142     removeIfDead(OldInsts[i]);
   1143 
   1144   ++NumHWLoops;
   1145   return true;
   1146 }
   1147 
   1148 
   1149 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
   1150                                             MachineInstr *CmpI) {
   1151   assert (BumpI != CmpI && "Bump and compare in the same instruction?");
   1152 
   1153   MachineBasicBlock *BB = BumpI->getParent();
   1154   if (CmpI->getParent() != BB)
   1155     return false;
   1156 
   1157   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1158   // Check if things are in order to begin with.
   1159   for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
   1160     if (&*I == CmpI)
   1161       return true;
   1162 
   1163   // Out of order.
   1164   unsigned PredR = CmpI->getOperand(0).getReg();
   1165   bool FoundBump = false;
   1166   instr_iterator CmpIt = CmpI, NextIt = std::next(CmpIt);
   1167   for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
   1168     MachineInstr *In = &*I;
   1169     for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
   1170       MachineOperand &MO = In->getOperand(i);
   1171       if (MO.isReg() && MO.isUse()) {
   1172         if (MO.getReg() == PredR)  // Found an intervening use of PredR.
   1173           return false;
   1174       }
   1175     }
   1176 
   1177     if (In == BumpI) {
   1178       instr_iterator After = BumpI;
   1179       instr_iterator From = CmpI;
   1180       BB->splice(std::next(After), BB, From);
   1181       FoundBump = true;
   1182       break;
   1183     }
   1184   }
   1185   assert (FoundBump && "Cannot determine instruction order");
   1186   return FoundBump;
   1187 }
   1188 
   1189 
   1190 MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
   1191   MachineInstr *DI = MRI->getVRegDef(R);
   1192   unsigned DOpc = DI->getOpcode();
   1193   switch (DOpc) {
   1194     case Hexagon::TFRI:
   1195     case Hexagon::TFRI64:
   1196     case Hexagon::CONST32_Int_Real:
   1197     case Hexagon::CONST64_Int_Real:
   1198       return DI;
   1199   }
   1200   return nullptr;
   1201 }
   1202 
   1203 
   1204 int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
   1205   if (MO.isImm())
   1206     return MO.getImm();
   1207   assert(MO.isReg());
   1208   unsigned R = MO.getReg();
   1209   MachineInstr *DI = defWithImmediate(R);
   1210   assert(DI && "Need an immediate operand");
   1211   // All currently supported "define-with-immediate" instructions have the
   1212   // actual immediate value in the operand(1).
   1213   int64_t v = DI->getOperand(1).getImm();
   1214   return v;
   1215 }
   1216 
   1217 
   1218 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
   1219   if (MO.isImm()) {
   1220     MO.setImm(Val);
   1221     return;
   1222   }
   1223 
   1224   assert(MO.isReg());
   1225   unsigned R = MO.getReg();
   1226   MachineInstr *DI = defWithImmediate(R);
   1227   if (MRI->hasOneNonDBGUse(R)) {
   1228     // If R has only one use, then just change its defining instruction to
   1229     // the new immediate value.
   1230     DI->getOperand(1).setImm(Val);
   1231     return;
   1232   }
   1233 
   1234   const TargetRegisterClass *RC = MRI->getRegClass(R);
   1235   unsigned NewR = MRI->createVirtualRegister(RC);
   1236   MachineBasicBlock &B = *DI->getParent();
   1237   DebugLoc DL = DI->getDebugLoc();
   1238   BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
   1239     .addImm(Val);
   1240   MO.setReg(NewR);
   1241 }
   1242 
   1243 
   1244 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   1245   MachineBasicBlock *Header = L->getHeader();
   1246   MachineBasicBlock *Preheader = L->getLoopPreheader();
   1247   MachineBasicBlock *Latch = L->getLoopLatch();
   1248 
   1249   if (!Header || !Preheader || !Latch)
   1250     return false;
   1251 
   1252   // These data structures follow the same concept as the corresponding
   1253   // ones in findInductionRegister (where some comments are).
   1254   typedef std::pair<unsigned,int64_t> RegisterBump;
   1255   typedef std::pair<unsigned,RegisterBump> RegisterInduction;
   1256   typedef std::set<RegisterInduction> RegisterInductionSet;
   1257 
   1258   // Register candidates for induction variables, with their associated bumps.
   1259   RegisterInductionSet IndRegs;
   1260 
   1261   // Look for induction patterns:
   1262   //   vreg1 = PHI ..., [ latch, vreg2 ]
   1263   //   vreg2 = ADD vreg1, imm
   1264   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1265   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1266        I != E && I->isPHI(); ++I) {
   1267     MachineInstr *Phi = &*I;
   1268 
   1269     // Have a PHI instruction.
   1270     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
   1271       if (Phi->getOperand(i+1).getMBB() != Latch)
   1272         continue;
   1273 
   1274       unsigned PhiReg = Phi->getOperand(i).getReg();
   1275       MachineInstr *DI = MRI->getVRegDef(PhiReg);
   1276       unsigned UpdOpc = DI->getOpcode();
   1277       bool isAdd = (UpdOpc == Hexagon::ADD_ri);
   1278 
   1279       if (isAdd) {
   1280         // If the register operand to the add/sub is the PHI we are looking
   1281         // at, this meets the induction pattern.
   1282         unsigned IndReg = DI->getOperand(1).getReg();
   1283         if (MRI->getVRegDef(IndReg) == Phi) {
   1284           unsigned UpdReg = DI->getOperand(0).getReg();
   1285           int64_t V = DI->getOperand(2).getImm();
   1286           IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
   1287         }
   1288       }
   1289     }  // for (i)
   1290   }  // for (instr)
   1291 
   1292   if (IndRegs.empty())
   1293     return false;
   1294 
   1295   MachineBasicBlock *TB = nullptr, *FB = nullptr;
   1296   SmallVector<MachineOperand,2> Cond;
   1297   // AnalyzeBranch returns true if it fails to analyze branch.
   1298   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
   1299   if (NotAnalyzed)
   1300     return false;
   1301 
   1302   // Check if the latch branch is unconditional.
   1303   if (Cond.empty())
   1304     return false;
   1305 
   1306   if (TB != Header && FB != Header)
   1307     // The latch does not go back to the header.  Not a latch we know and love.
   1308     return false;
   1309 
   1310   // Expecting a predicate register as a condition.  It won't be a hardware
   1311   // predicate register at this point yet, just a vreg.
   1312   // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
   1313   // into Cond, followed by the predicate register.  For non-negated branches
   1314   // it's just the register.
   1315   unsigned CSz = Cond.size();
   1316   if (CSz != 1 && CSz != 2)
   1317     return false;
   1318 
   1319   unsigned P = Cond[CSz-1].getReg();
   1320   MachineInstr *PredDef = MRI->getVRegDef(P);
   1321 
   1322   if (!PredDef->isCompare())
   1323     return false;
   1324 
   1325   SmallSet<unsigned,2> CmpRegs;
   1326   MachineOperand *CmpImmOp = nullptr;
   1327 
   1328   // Go over all operands to the compare and look for immediate and register
   1329   // operands.  Assume that if the compare has a single register use and a
   1330   // single immediate operand, then the register is being compared with the
   1331   // immediate value.
   1332   for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
   1333     MachineOperand &MO = PredDef->getOperand(i);
   1334     if (MO.isReg()) {
   1335       // Skip all implicit references.  In one case there was:
   1336       //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
   1337       if (MO.isImplicit())
   1338         continue;
   1339       if (MO.isUse()) {
   1340         unsigned R = MO.getReg();
   1341         if (!defWithImmediate(R)) {
   1342           CmpRegs.insert(MO.getReg());
   1343           continue;
   1344         }
   1345         // Consider the register to be the "immediate" operand.
   1346         if (CmpImmOp)
   1347           return false;
   1348         CmpImmOp = &MO;
   1349       }
   1350     } else if (MO.isImm()) {
   1351       if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
   1352         return false;
   1353       CmpImmOp = &MO;
   1354     }
   1355   }
   1356 
   1357   if (CmpRegs.empty())
   1358     return false;
   1359 
   1360   // Check if the compared register follows the order we want.  Fix if needed.
   1361   for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
   1362        I != E; ++I) {
   1363     // This is a success.  If the register used in the comparison is one that
   1364     // we have identified as a bumped (updated) induction register, there is
   1365     // nothing to do.
   1366     if (CmpRegs.count(I->first))
   1367       return true;
   1368 
   1369     // Otherwise, if the register being compared comes out of a PHI node,
   1370     // and has been recognized as following the induction pattern, and is
   1371     // compared against an immediate, we can fix it.
   1372     const RegisterBump &RB = I->second;
   1373     if (CmpRegs.count(RB.first)) {
   1374       if (!CmpImmOp)
   1375         return false;
   1376 
   1377       int64_t CmpImm = getImmediate(*CmpImmOp);
   1378       int64_t V = RB.second;
   1379       if (V > 0 && CmpImm+V < CmpImm)  // Overflow (64-bit).
   1380         return false;
   1381       if (V < 0 && CmpImm+V > CmpImm)  // Overflow (64-bit).
   1382         return false;
   1383       CmpImm += V;
   1384       // Some forms of cmp-immediate allow u9 and s10.  Assume the worst case
   1385       // scenario, i.e. an 8-bit value.
   1386       if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
   1387         return false;
   1388 
   1389       // Make sure that the compare happens after the bump.  Otherwise,
   1390       // after the fixup, the compare would use a yet-undefined register.
   1391       MachineInstr *BumpI = MRI->getVRegDef(I->first);
   1392       bool Order = orderBumpCompare(BumpI, PredDef);
   1393       if (!Order)
   1394         return false;
   1395 
   1396       // Finally, fix the compare instruction.
   1397       setImmediate(*CmpImmOp, CmpImm);
   1398       for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
   1399         MachineOperand &MO = PredDef->getOperand(i);
   1400         if (MO.isReg() && MO.getReg() == RB.first) {
   1401           MO.setReg(I->first);
   1402           return true;
   1403         }
   1404       }
   1405     }
   1406   }
   1407 
   1408   return false;
   1409 }
   1410 
   1411 
   1412 /// \brief Create a preheader for a given loop.
   1413 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   1414       MachineLoop *L) {
   1415   if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
   1416     return TmpPH;
   1417 
   1418   MachineBasicBlock *Header = L->getHeader();
   1419   MachineBasicBlock *Latch = L->getLoopLatch();
   1420   MachineFunction *MF = Header->getParent();
   1421   DebugLoc DL;
   1422 
   1423   if (!Latch || Header->hasAddressTaken())
   1424     return nullptr;
   1425 
   1426   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1427 
   1428   // Verify that all existing predecessors have analyzable branches
   1429   // (or no branches at all).
   1430   typedef std::vector<MachineBasicBlock*> MBBVector;
   1431   MBBVector Preds(Header->pred_begin(), Header->pred_end());
   1432   SmallVector<MachineOperand,2> Tmp1;
   1433   MachineBasicBlock *TB = nullptr, *FB = nullptr;
   1434 
   1435   if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
   1436     return nullptr;
   1437 
   1438   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
   1439     MachineBasicBlock *PB = *I;
   1440     if (PB != Latch) {
   1441       bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
   1442       if (NotAnalyzed)
   1443         return nullptr;
   1444     }
   1445   }
   1446 
   1447   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
   1448   MF->insert(Header, NewPH);
   1449 
   1450   if (Header->pred_size() > 2) {
   1451     // Ensure that the header has only two predecessors: the preheader and
   1452     // the loop latch.  Any additional predecessors of the header should
   1453     // join at the newly created preheader.  Inspect all PHI nodes from the
   1454     // header and create appropriate corresponding PHI nodes in the preheader.
   1455 
   1456     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1457          I != E && I->isPHI(); ++I) {
   1458       MachineInstr *PN = &*I;
   1459 
   1460       const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
   1461       MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
   1462       NewPH->insert(NewPH->end(), NewPN);
   1463 
   1464       unsigned PR = PN->getOperand(0).getReg();
   1465       const TargetRegisterClass *RC = MRI->getRegClass(PR);
   1466       unsigned NewPR = MRI->createVirtualRegister(RC);
   1467       NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
   1468 
   1469       // Copy all non-latch operands of a header's PHI node to the newly
   1470       // created PHI node in the preheader.
   1471       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
   1472         unsigned PredR = PN->getOperand(i).getReg();
   1473         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
   1474         if (PredB == Latch)
   1475           continue;
   1476 
   1477         NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
   1478         NewPN->addOperand(MachineOperand::CreateMBB(PredB));
   1479       }
   1480 
   1481       // Remove copied operands from the old PHI node and add the value
   1482       // coming from the preheader's PHI.
   1483       for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
   1484         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
   1485         if (PredB != Latch) {
   1486           PN->RemoveOperand(i+1);
   1487           PN->RemoveOperand(i);
   1488         }
   1489       }
   1490       PN->addOperand(MachineOperand::CreateReg(NewPR, false));
   1491       PN->addOperand(MachineOperand::CreateMBB(NewPH));
   1492     }
   1493 
   1494   } else {
   1495     assert(Header->pred_size() == 2);
   1496 
   1497     // The header has only two predecessors, but the non-latch predecessor
   1498     // is not a preheader (e.g. it has other successors, etc.)
   1499     // In such a case we don't need any extra PHI nodes in the new preheader,
   1500     // all we need is to adjust existing PHIs in the header to now refer to
   1501     // the new preheader.
   1502     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1503          I != E && I->isPHI(); ++I) {
   1504       MachineInstr *PN = &*I;
   1505       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
   1506         MachineOperand &MO = PN->getOperand(i+1);
   1507         if (MO.getMBB() != Latch)
   1508           MO.setMBB(NewPH);
   1509       }
   1510     }
   1511   }
   1512 
   1513   // "Reroute" the CFG edges to link in the new preheader.
   1514   // If any of the predecessors falls through to the header, insert a branch
   1515   // to the new preheader in that place.
   1516   SmallVector<MachineOperand,1> Tmp2;
   1517   SmallVector<MachineOperand,1> EmptyCond;
   1518 
   1519   TB = FB = nullptr;
   1520 
   1521   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
   1522     MachineBasicBlock *PB = *I;
   1523     if (PB != Latch) {
   1524       Tmp2.clear();
   1525       bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
   1526       (void)NotAnalyzed; // suppress compiler warning
   1527       assert (!NotAnalyzed && "Should be analyzable!");
   1528       if (TB != Header && (Tmp2.empty() || FB != Header))
   1529         TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
   1530       PB->ReplaceUsesOfBlockWith(Header, NewPH);
   1531     }
   1532   }
   1533 
   1534   // It can happen that the latch block will fall through into the header.
   1535   // Insert an unconditional branch to the header.
   1536   TB = FB = nullptr;
   1537   bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
   1538   (void)LatchNotAnalyzed; // suppress compiler warning
   1539   assert (!LatchNotAnalyzed && "Should be analyzable!");
   1540   if (!TB && !FB)
   1541     TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
   1542 
   1543   // Finally, the branch from the preheader to the header.
   1544   TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
   1545   NewPH->addSuccessor(Header);
   1546 
   1547   return NewPH;
   1548 }
   1549