Home | History | Annotate | Download | only in Hexagon
      1 //===-- HexagonHardwareLoops.cpp - Identify and generate hardware loops ---===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass identifies loops where we can generate the Hexagon hardware
     11 // loop instruction.  The hardware loop can perform loop branches with a
     12 // zero-cycle overhead.
     13 //
     14 // The pattern that defines the induction variable can changed depending on
     15 // prior optimizations.  For example, the IndVarSimplify phase run by 'opt'
     16 // normalizes induction variables, and the Loop Strength Reduction pass
     17 // run by 'llc' may also make changes to the induction variable.
     18 // The pattern detected by this phase is due to running Strength Reduction.
     19 //
     20 // Criteria for hardware loops:
     21 //  - Countable loops (w/ ind. var for a trip count)
     22 //  - Assumes loops are normalized by IndVarSimplify
     23 //  - Try inner-most loops first
     24 //  - No nested hardware loops.
     25 //  - No function calls in loops.
     26 //
     27 //===----------------------------------------------------------------------===//
     28 
     29 #define DEBUG_TYPE "hwloops"
     30 #include "llvm/ADT/SmallSet.h"
     31 #include "llvm/ADT/Statistic.h"
     32 #include "llvm/CodeGen/MachineDominators.h"
     33 #include "llvm/CodeGen/MachineFunction.h"
     34 #include "llvm/CodeGen/MachineFunctionPass.h"
     35 #include "llvm/CodeGen/MachineInstrBuilder.h"
     36 #include "llvm/CodeGen/MachineLoopInfo.h"
     37 #include "llvm/CodeGen/MachineRegisterInfo.h"
     38 #include "llvm/PassSupport.h"
     39 #include "llvm/Support/CommandLine.h"
     40 #include "llvm/Support/Debug.h"
     41 #include "llvm/Support/raw_ostream.h"
     42 #include "llvm/Target/TargetInstrInfo.h"
     43 #include "Hexagon.h"
     44 #include "HexagonTargetMachine.h"
     45 
     46 #include <algorithm>
     47 #include <vector>
     48 
     49 using namespace llvm;
     50 
     51 #ifndef NDEBUG
     52 static cl::opt<int> HWLoopLimit("max-hwloop", cl::Hidden, cl::init(-1));
     53 #endif
     54 
     55 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
     56 
     57 namespace llvm {
     58   void initializeHexagonHardwareLoopsPass(PassRegistry&);
     59 }
     60 
     61 namespace {
     62   class CountValue;
     63   struct HexagonHardwareLoops : public MachineFunctionPass {
     64     MachineLoopInfo            *MLI;
     65     MachineRegisterInfo        *MRI;
     66     MachineDominatorTree       *MDT;
     67     const HexagonTargetMachine *TM;
     68     const HexagonInstrInfo     *TII;
     69     const HexagonRegisterInfo  *TRI;
     70 #ifndef NDEBUG
     71     static int Counter;
     72 #endif
     73 
     74   public:
     75     static char ID;
     76 
     77     HexagonHardwareLoops() : MachineFunctionPass(ID) {
     78       initializeHexagonHardwareLoopsPass(*PassRegistry::getPassRegistry());
     79     }
     80 
     81     virtual bool runOnMachineFunction(MachineFunction &MF);
     82 
     83     const char *getPassName() const { return "Hexagon Hardware Loops"; }
     84 
     85     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     86       AU.addRequired<MachineDominatorTree>();
     87       AU.addRequired<MachineLoopInfo>();
     88       MachineFunctionPass::getAnalysisUsage(AU);
     89     }
     90 
     91   private:
     92     /// Kinds of comparisons in the compare instructions.
     93     struct Comparison {
     94       enum Kind {
     95         EQ  = 0x01,
     96         NE  = 0x02,
     97         L   = 0x04, // Less-than property.
     98         G   = 0x08, // Greater-than property.
     99         U   = 0x40, // Unsigned property.
    100         LTs = L,
    101         LEs = L | EQ,
    102         GTs = G,
    103         GEs = G | EQ,
    104         LTu = L      | U,
    105         LEu = L | EQ | U,
    106         GTu = G      | U,
    107         GEu = G | EQ | U
    108       };
    109 
    110       static Kind getSwappedComparison(Kind Cmp) {
    111         assert ((!((Cmp & L) && (Cmp & G))) && "Malformed comparison operator");
    112         if ((Cmp & L) || (Cmp & G))
    113           return (Kind)(Cmp ^ (L|G));
    114         return Cmp;
    115       }
    116     };
    117 
    118     /// \brief Find the register that contains the loop controlling
    119     /// induction variable.
    120     /// If successful, it will return true and set the \p Reg, \p IVBump
    121     /// and \p IVOp arguments.  Otherwise it will return false.
    122     /// The returned induction register is the register R that follows the
    123     /// following induction pattern:
    124     /// loop:
    125     ///   R = phi ..., [ R.next, LatchBlock ]
    126     ///   R.next = R + #bump
    127     ///   if (R.next < #N) goto loop
    128     /// IVBump is the immediate value added to R, and IVOp is the instruction
    129     /// "R.next = R + #bump".
    130     bool findInductionRegister(MachineLoop *L, unsigned &Reg,
    131                                int64_t &IVBump, MachineInstr *&IVOp) const;
    132 
    133     /// \brief Analyze the statements in a loop to determine if the loop
    134     /// has a computable trip count and, if so, return a value that represents
    135     /// the trip count expression.
    136     CountValue *getLoopTripCount(MachineLoop *L,
    137                                  SmallVector<MachineInstr*, 2> &OldInsts);
    138 
    139     /// \brief Return the expression that represents the number of times
    140     /// a loop iterates.  The function takes the operands that represent the
    141     /// loop start value, loop end value, and induction value.  Based upon
    142     /// these operands, the function attempts to compute the trip count.
    143     /// If the trip count is not directly available (as an immediate value,
    144     /// or a register), the function will attempt to insert computation of it
    145     /// to the loop's preheader.
    146     CountValue *computeCount(MachineLoop *Loop,
    147                              const MachineOperand *Start,
    148                              const MachineOperand *End,
    149                              unsigned IVReg,
    150                              int64_t IVBump,
    151                              Comparison::Kind Cmp) const;
    152 
    153     /// \brief Return true if the instruction is not valid within a hardware
    154     /// loop.
    155     bool isInvalidLoopOperation(const MachineInstr *MI) const;
    156 
    157     /// \brief Return true if the loop contains an instruction that inhibits
    158     /// using the hardware loop.
    159     bool containsInvalidInstruction(MachineLoop *L) const;
    160 
    161     /// \brief Given a loop, check if we can convert it to a hardware loop.
    162     /// If so, then perform the conversion and return true.
    163     bool convertToHardwareLoop(MachineLoop *L);
    164 
    165     /// \brief Return true if the instruction is now dead.
    166     bool isDead(const MachineInstr *MI,
    167                 SmallVector<MachineInstr*, 1> &DeadPhis) const;
    168 
    169     /// \brief Remove the instruction if it is now dead.
    170     void removeIfDead(MachineInstr *MI);
    171 
    172     /// \brief Make sure that the "bump" instruction executes before the
    173     /// compare.  We need that for the IV fixup, so that the compare
    174     /// instruction would not use a bumped value that has not yet been
    175     /// defined.  If the instructions are out of order, try to reorder them.
    176     bool orderBumpCompare(MachineInstr *BumpI, MachineInstr *CmpI);
    177 
    178     /// \brief Get the instruction that loads an immediate value into \p R,
    179     /// or 0 if such an instruction does not exist.
    180     MachineInstr *defWithImmediate(unsigned R);
    181 
    182     /// \brief Get the immediate value referenced to by \p MO, either for
    183     /// immediate operands, or for register operands, where the register
    184     /// was defined with an immediate value.
    185     int64_t getImmediate(MachineOperand &MO);
    186 
    187     /// \brief Reset the given machine operand to now refer to a new immediate
    188     /// value.  Assumes that the operand was already referencing an immediate
    189     /// value, either directly, or via a register.
    190     void setImmediate(MachineOperand &MO, int64_t Val);
    191 
    192     /// \brief Fix the data flow of the induction varible.
    193     /// The desired flow is: phi ---> bump -+-> comparison-in-latch.
    194     ///                                     |
    195     ///                                     +-> back to phi
    196     /// where "bump" is the increment of the induction variable:
    197     ///   iv = iv + #const.
    198     /// Due to some prior code transformations, the actual flow may look
    199     /// like this:
    200     ///   phi -+-> bump ---> back to phi
    201     ///        |
    202     ///        +-> comparison-in-latch (against upper_bound-bump),
    203     /// i.e. the comparison that controls the loop execution may be using
    204     /// the value of the induction variable from before the increment.
    205     ///
    206     /// Return true if the loop's flow is the desired one (i.e. it's
    207     /// either been fixed, or no fixing was necessary).
    208     /// Otherwise, return false.  This can happen if the induction variable
    209     /// couldn't be identified, or if the value in the latch's comparison
    210     /// cannot be adjusted to reflect the post-bump value.
    211     bool fixupInductionVariable(MachineLoop *L);
    212 
    213     /// \brief Given a loop, if it does not have a preheader, create one.
    214     /// Return the block that is the preheader.
    215     MachineBasicBlock *createPreheaderForLoop(MachineLoop *L);
    216   };
    217 
    218   char HexagonHardwareLoops::ID = 0;
    219 #ifndef NDEBUG
    220   int HexagonHardwareLoops::Counter = 0;
    221 #endif
    222 
    223   /// \brief Abstraction for a trip count of a loop. A smaller vesrsion
    224   /// of the MachineOperand class without the concerns of changing the
    225   /// operand representation.
    226   class CountValue {
    227   public:
    228     enum CountValueType {
    229       CV_Register,
    230       CV_Immediate
    231     };
    232   private:
    233     CountValueType Kind;
    234     union Values {
    235       struct {
    236         unsigned Reg;
    237         unsigned Sub;
    238       } R;
    239       unsigned ImmVal;
    240     } Contents;
    241 
    242   public:
    243     explicit CountValue(CountValueType t, unsigned v, unsigned u = 0) {
    244       Kind = t;
    245       if (Kind == CV_Register) {
    246         Contents.R.Reg = v;
    247         Contents.R.Sub = u;
    248       } else {
    249         Contents.ImmVal = v;
    250       }
    251     }
    252     bool isReg() const { return Kind == CV_Register; }
    253     bool isImm() const { return Kind == CV_Immediate; }
    254 
    255     unsigned getReg() const {
    256       assert(isReg() && "Wrong CountValue accessor");
    257       return Contents.R.Reg;
    258     }
    259     unsigned getSubReg() const {
    260       assert(isReg() && "Wrong CountValue accessor");
    261       return Contents.R.Sub;
    262     }
    263     unsigned getImm() const {
    264       assert(isImm() && "Wrong CountValue accessor");
    265       return Contents.ImmVal;
    266     }
    267 
    268     void print(raw_ostream &OS, const TargetMachine *TM = 0) const {
    269       const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
    270       if (isReg()) { OS << PrintReg(Contents.R.Reg, TRI, Contents.R.Sub); }
    271       if (isImm()) { OS << Contents.ImmVal; }
    272     }
    273   };
    274 } // end anonymous namespace
    275 
    276 
    277 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
    278                       "Hexagon Hardware Loops", false, false)
    279 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    280 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    281 INITIALIZE_PASS_END(HexagonHardwareLoops, "hwloops",
    282                     "Hexagon Hardware Loops", false, false)
    283 
    284 
    285 /// \brief Returns true if the instruction is a hardware loop instruction.
    286 static bool isHardwareLoop(const MachineInstr *MI) {
    287   return MI->getOpcode() == Hexagon::LOOP0_r ||
    288     MI->getOpcode() == Hexagon::LOOP0_i;
    289 }
    290 
    291 FunctionPass *llvm::createHexagonHardwareLoops() {
    292   return new HexagonHardwareLoops();
    293 }
    294 
    295 
    296 bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
    297   DEBUG(dbgs() << "********* Hexagon Hardware Loops *********\n");
    298 
    299   bool Changed = false;
    300 
    301   MLI = &getAnalysis<MachineLoopInfo>();
    302   MRI = &MF.getRegInfo();
    303   MDT = &getAnalysis<MachineDominatorTree>();
    304   TM  = static_cast<const HexagonTargetMachine*>(&MF.getTarget());
    305   TII = static_cast<const HexagonInstrInfo*>(TM->getInstrInfo());
    306   TRI = static_cast<const HexagonRegisterInfo*>(TM->getRegisterInfo());
    307 
    308   for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end();
    309        I != E; ++I) {
    310     MachineLoop *L = *I;
    311     if (!L->getParentLoop())
    312       Changed |= convertToHardwareLoop(L);
    313   }
    314 
    315   return Changed;
    316 }
    317 
    318 
    319 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
    320                                                  unsigned &Reg,
    321                                                  int64_t &IVBump,
    322                                                  MachineInstr *&IVOp
    323                                                  ) const {
    324   MachineBasicBlock *Header = L->getHeader();
    325   MachineBasicBlock *Preheader = L->getLoopPreheader();
    326   MachineBasicBlock *Latch = L->getLoopLatch();
    327   if (!Header || !Preheader || !Latch)
    328     return false;
    329 
    330   // This pair represents an induction register together with an immediate
    331   // value that will be added to it in each loop iteration.
    332   typedef std::pair<unsigned,int64_t> RegisterBump;
    333 
    334   // Mapping:  R.next -> (R, bump), where R, R.next and bump are derived
    335   // from an induction operation
    336   //   R.next = R + bump
    337   // where bump is an immediate value.
    338   typedef std::map<unsigned,RegisterBump> InductionMap;
    339 
    340   InductionMap IndMap;
    341 
    342   typedef MachineBasicBlock::instr_iterator instr_iterator;
    343   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
    344        I != E && I->isPHI(); ++I) {
    345     MachineInstr *Phi = &*I;
    346 
    347     // Have a PHI instruction.  Get the operand that corresponds to the
    348     // latch block, and see if is a result of an addition of form "reg+imm",
    349     // where the "reg" is defined by the PHI node we are looking at.
    350     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
    351       if (Phi->getOperand(i+1).getMBB() != Latch)
    352         continue;
    353 
    354       unsigned PhiOpReg = Phi->getOperand(i).getReg();
    355       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
    356       unsigned UpdOpc = DI->getOpcode();
    357       bool isAdd = (UpdOpc == Hexagon::ADD_ri);
    358 
    359       if (isAdd) {
    360         // If the register operand to the add is the PHI we're
    361         // looking at, this meets the induction pattern.
    362         unsigned IndReg = DI->getOperand(1).getReg();
    363         if (MRI->getVRegDef(IndReg) == Phi) {
    364           unsigned UpdReg = DI->getOperand(0).getReg();
    365           int64_t V = DI->getOperand(2).getImm();
    366           IndMap.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
    367         }
    368       }
    369     }  // for (i)
    370   }  // for (instr)
    371 
    372   SmallVector<MachineOperand,2> Cond;
    373   MachineBasicBlock *TB = 0, *FB = 0;
    374   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
    375   if (NotAnalyzed)
    376     return false;
    377 
    378   unsigned CSz = Cond.size();
    379   assert (CSz == 1 || CSz == 2);
    380   unsigned PredR = Cond[CSz-1].getReg();
    381 
    382   MachineInstr *PredI = MRI->getVRegDef(PredR);
    383   if (!PredI->isCompare())
    384     return false;
    385 
    386   unsigned CmpReg1 = 0, CmpReg2 = 0;
    387   int CmpImm = 0, CmpMask = 0;
    388   bool CmpAnalyzed = TII->analyzeCompare(PredI, CmpReg1, CmpReg2,
    389                                          CmpMask, CmpImm);
    390   // Fail if the compare was not analyzed, or it's not comparing a register
    391   // with an immediate value.  Not checking the mask here, since we handle
    392   // the individual compare opcodes (including CMPb) later on.
    393   if (!CmpAnalyzed)
    394     return false;
    395 
    396   // Exactly one of the input registers to the comparison should be among
    397   // the induction registers.
    398   InductionMap::iterator IndMapEnd = IndMap.end();
    399   InductionMap::iterator F = IndMapEnd;
    400   if (CmpReg1 != 0) {
    401     InductionMap::iterator F1 = IndMap.find(CmpReg1);
    402     if (F1 != IndMapEnd)
    403       F = F1;
    404   }
    405   if (CmpReg2 != 0) {
    406     InductionMap::iterator F2 = IndMap.find(CmpReg2);
    407     if (F2 != IndMapEnd) {
    408       if (F != IndMapEnd)
    409         return false;
    410       F = F2;
    411     }
    412   }
    413   if (F == IndMapEnd)
    414     return false;
    415 
    416   Reg = F->second.first;
    417   IVBump = F->second.second;
    418   IVOp = MRI->getVRegDef(F->first);
    419   return true;
    420 }
    421 
    422 
    423 /// \brief Analyze the statements in a loop to determine if the loop has
    424 /// a computable trip count and, if so, return a value that represents
    425 /// the trip count expression.
    426 ///
    427 /// This function iterates over the phi nodes in the loop to check for
    428 /// induction variable patterns that are used in the calculation for
    429 /// the number of time the loop is executed.
    430 CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
    431                                 SmallVector<MachineInstr*, 2> &OldInsts) {
    432   MachineBasicBlock *TopMBB = L->getTopBlock();
    433   MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin();
    434   assert(PI != TopMBB->pred_end() &&
    435          "Loop must have more than one incoming edge!");
    436   MachineBasicBlock *Backedge = *PI++;
    437   if (PI == TopMBB->pred_end())  // dead loop?
    438     return 0;
    439   MachineBasicBlock *Incoming = *PI++;
    440   if (PI != TopMBB->pred_end())  // multiple backedges?
    441     return 0;
    442 
    443   // Make sure there is one incoming and one backedge and determine which
    444   // is which.
    445   if (L->contains(Incoming)) {
    446     if (L->contains(Backedge))
    447       return 0;
    448     std::swap(Incoming, Backedge);
    449   } else if (!L->contains(Backedge))
    450     return 0;
    451 
    452   // Look for the cmp instruction to determine if we can get a useful trip
    453   // count.  The trip count can be either a register or an immediate.  The
    454   // location of the value depends upon the type (reg or imm).
    455   MachineBasicBlock *Latch = L->getLoopLatch();
    456   if (!Latch)
    457     return 0;
    458 
    459   unsigned IVReg = 0;
    460   int64_t IVBump = 0;
    461   MachineInstr *IVOp;
    462   bool FoundIV = findInductionRegister(L, IVReg, IVBump, IVOp);
    463   if (!FoundIV)
    464     return 0;
    465 
    466   MachineBasicBlock *Preheader = L->getLoopPreheader();
    467 
    468   MachineOperand *InitialValue = 0;
    469   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
    470   for (unsigned i = 1, n = IV_Phi->getNumOperands(); i < n; i += 2) {
    471     MachineBasicBlock *MBB = IV_Phi->getOperand(i+1).getMBB();
    472     if (MBB == Preheader)
    473       InitialValue = &IV_Phi->getOperand(i);
    474     else if (MBB == Latch)
    475       IVReg = IV_Phi->getOperand(i).getReg();  // Want IV reg after bump.
    476   }
    477   if (!InitialValue)
    478     return 0;
    479 
    480   SmallVector<MachineOperand,2> Cond;
    481   MachineBasicBlock *TB = 0, *FB = 0;
    482   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
    483   if (NotAnalyzed)
    484     return 0;
    485 
    486   MachineBasicBlock *Header = L->getHeader();
    487   // TB must be non-null.  If FB is also non-null, one of them must be
    488   // the header.  Otherwise, branch to TB could be exiting the loop, and
    489   // the fall through can go to the header.
    490   assert (TB && "Latch block without a branch?");
    491   assert ((!FB || TB == Header || FB == Header) && "Branches not to header?");
    492   if (!TB || (FB && TB != Header && FB != Header))
    493     return 0;
    494 
    495   // Branches of form "if (!P) ..." cause HexagonInstrInfo::AnalyzeBranch
    496   // to put imm(0), followed by P in the vector Cond.
    497   // If TB is not the header, it means that the "not-taken" path must lead
    498   // to the header.
    499   bool Negated = (Cond.size() > 1) ^ (TB != Header);
    500   unsigned PredReg = Cond[Cond.size()-1].getReg();
    501   MachineInstr *CondI = MRI->getVRegDef(PredReg);
    502   unsigned CondOpc = CondI->getOpcode();
    503 
    504   unsigned CmpReg1 = 0, CmpReg2 = 0;
    505   int Mask = 0, ImmValue = 0;
    506   bool AnalyzedCmp = TII->analyzeCompare(CondI, CmpReg1, CmpReg2,
    507                                          Mask, ImmValue);
    508   if (!AnalyzedCmp)
    509     return 0;
    510 
    511   // The comparison operator type determines how we compute the loop
    512   // trip count.
    513   OldInsts.push_back(CondI);
    514   OldInsts.push_back(IVOp);
    515 
    516   // Sadly, the following code gets information based on the position
    517   // of the operands in the compare instruction.  This has to be done
    518   // this way, because the comparisons check for a specific relationship
    519   // between the operands (e.g. is-less-than), rather than to find out
    520   // what relationship the operands are in (as on PPC).
    521   Comparison::Kind Cmp;
    522   bool isSwapped = false;
    523   const MachineOperand &Op1 = CondI->getOperand(1);
    524   const MachineOperand &Op2 = CondI->getOperand(2);
    525   const MachineOperand *EndValue = 0;
    526 
    527   if (Op1.isReg()) {
    528     if (Op2.isImm() || Op1.getReg() == IVReg)
    529       EndValue = &Op2;
    530     else {
    531       EndValue = &Op1;
    532       isSwapped = true;
    533     }
    534   }
    535 
    536   if (!EndValue)
    537     return 0;
    538 
    539   switch (CondOpc) {
    540     case Hexagon::CMPEQri:
    541     case Hexagon::CMPEQrr:
    542       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
    543       break;
    544     case Hexagon::CMPLTrr:
    545       Cmp = !Negated ? Comparison::LTs : Comparison::GEs;
    546       break;
    547     case Hexagon::CMPLTUrr:
    548       Cmp = !Negated ? Comparison::LTu : Comparison::GEu;
    549       break;
    550     case Hexagon::CMPGTUri:
    551     case Hexagon::CMPGTUrr:
    552       Cmp = !Negated ? Comparison::GTu : Comparison::LEu;
    553       break;
    554     case Hexagon::CMPGTri:
    555     case Hexagon::CMPGTrr:
    556       Cmp = !Negated ? Comparison::GTs : Comparison::LEs;
    557       break;
    558     // Very limited support for byte/halfword compares.
    559     case Hexagon::CMPbEQri_V4:
    560     case Hexagon::CMPhEQri_V4: {
    561       if (IVBump != 1)
    562         return 0;
    563 
    564       int64_t InitV, EndV;
    565       // Since the comparisons are "ri", the EndValue should be an
    566       // immediate.  Check it just in case.
    567       assert(EndValue->isImm() && "Unrecognized latch comparison");
    568       EndV = EndValue->getImm();
    569       // Allow InitialValue to be a register defined with an immediate.
    570       if (InitialValue->isReg()) {
    571         if (!defWithImmediate(InitialValue->getReg()))
    572           return 0;
    573         InitV = getImmediate(*InitialValue);
    574       } else {
    575         assert(InitialValue->isImm());
    576         InitV = InitialValue->getImm();
    577       }
    578       if (InitV >= EndV)
    579         return 0;
    580       if (CondOpc == Hexagon::CMPbEQri_V4) {
    581         if (!isInt<8>(InitV) || !isInt<8>(EndV))
    582           return 0;
    583       } else {  // Hexagon::CMPhEQri_V4
    584         if (!isInt<16>(InitV) || !isInt<16>(EndV))
    585           return 0;
    586       }
    587       Cmp = !Negated ? Comparison::EQ : Comparison::NE;
    588       break;
    589     }
    590     default:
    591       return 0;
    592   }
    593 
    594   if (isSwapped)
    595    Cmp = Comparison::getSwappedComparison(Cmp);
    596 
    597   if (InitialValue->isReg()) {
    598     unsigned R = InitialValue->getReg();
    599     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
    600     if (!MDT->properlyDominates(DefBB, Header))
    601       return 0;
    602     OldInsts.push_back(MRI->getVRegDef(R));
    603   }
    604   if (EndValue->isReg()) {
    605     unsigned R = EndValue->getReg();
    606     MachineBasicBlock *DefBB = MRI->getVRegDef(R)->getParent();
    607     if (!MDT->properlyDominates(DefBB, Header))
    608       return 0;
    609   }
    610 
    611   return computeCount(L, InitialValue, EndValue, IVReg, IVBump, Cmp);
    612 }
    613 
    614 /// \brief Helper function that returns the expression that represents the
    615 /// number of times a loop iterates.  The function takes the operands that
    616 /// represent the loop start value, loop end value, and induction value.
    617 /// Based upon these operands, the function attempts to compute the trip count.
    618 CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
    619                                                const MachineOperand *Start,
    620                                                const MachineOperand *End,
    621                                                unsigned IVReg,
    622                                                int64_t IVBump,
    623                                                Comparison::Kind Cmp) const {
    624   // Cannot handle comparison EQ, i.e. while (A == B).
    625   if (Cmp == Comparison::EQ)
    626     return 0;
    627 
    628   // Check if either the start or end values are an assignment of an immediate.
    629   // If so, use the immediate value rather than the register.
    630   if (Start->isReg()) {
    631     const MachineInstr *StartValInstr = MRI->getVRegDef(Start->getReg());
    632     if (StartValInstr && StartValInstr->getOpcode() == Hexagon::TFRI)
    633       Start = &StartValInstr->getOperand(1);
    634   }
    635   if (End->isReg()) {
    636     const MachineInstr *EndValInstr = MRI->getVRegDef(End->getReg());
    637     if (EndValInstr && EndValInstr->getOpcode() == Hexagon::TFRI)
    638       End = &EndValInstr->getOperand(1);
    639   }
    640 
    641   assert (Start->isReg() || Start->isImm());
    642   assert (End->isReg() || End->isImm());
    643 
    644   bool CmpLess =     Cmp & Comparison::L;
    645   bool CmpGreater =  Cmp & Comparison::G;
    646   bool CmpHasEqual = Cmp & Comparison::EQ;
    647 
    648   // Avoid certain wrap-arounds.  This doesn't detect all wrap-arounds.
    649   // If loop executes while iv is "less" with the iv value going down, then
    650   // the iv must wrap.
    651   if (CmpLess && IVBump < 0)
    652     return 0;
    653   // If loop executes while iv is "greater" with the iv value going up, then
    654   // the iv must wrap.
    655   if (CmpGreater && IVBump > 0)
    656     return 0;
    657 
    658   if (Start->isImm() && End->isImm()) {
    659     // Both, start and end are immediates.
    660     int64_t StartV = Start->getImm();
    661     int64_t EndV = End->getImm();
    662     int64_t Dist = EndV - StartV;
    663     if (Dist == 0)
    664       return 0;
    665 
    666     bool Exact = (Dist % IVBump) == 0;
    667 
    668     if (Cmp == Comparison::NE) {
    669       if (!Exact)
    670         return 0;
    671       if ((Dist < 0) ^ (IVBump < 0))
    672         return 0;
    673     }
    674 
    675     // For comparisons that include the final value (i.e. include equality
    676     // with the final value), we need to increase the distance by 1.
    677     if (CmpHasEqual)
    678       Dist = Dist > 0 ? Dist+1 : Dist-1;
    679 
    680     // assert (CmpLess => Dist > 0);
    681     assert ((!CmpLess || Dist > 0) && "Loop should never iterate!");
    682     // assert (CmpGreater => Dist < 0);
    683     assert ((!CmpGreater || Dist < 0) && "Loop should never iterate!");
    684 
    685     // "Normalized" distance, i.e. with the bump set to +-1.
    686     int64_t Dist1 = (IVBump > 0) ? (Dist +  (IVBump-1)) /   IVBump
    687                                :  (-Dist + (-IVBump-1)) / (-IVBump);
    688     assert (Dist1 > 0 && "Fishy thing.  Both operands have the same sign.");
    689 
    690     uint64_t Count = Dist1;
    691 
    692     if (Count > 0xFFFFFFFFULL)
    693       return 0;
    694 
    695     return new CountValue(CountValue::CV_Immediate, Count);
    696   }
    697 
    698   // A general case: Start and End are some values, but the actual
    699   // iteration count may not be available.  If it is not, insert
    700   // a computation of it into the preheader.
    701 
    702   // If the induction variable bump is not a power of 2, quit.
    703   // Othwerise we'd need a general integer division.
    704   if (!isPowerOf2_64(abs(IVBump)))
    705     return 0;
    706 
    707   MachineBasicBlock *PH = Loop->getLoopPreheader();
    708   assert (PH && "Should have a preheader by now");
    709   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
    710   DebugLoc DL = (InsertPos != PH->end()) ? InsertPos->getDebugLoc()
    711                                          : DebugLoc();
    712 
    713   // If Start is an immediate and End is a register, the trip count
    714   // will be "reg - imm".  Hexagon's "subtract immediate" instruction
    715   // is actually "reg + -imm".
    716 
    717   // If the loop IV is going downwards, i.e. if the bump is negative,
    718   // then the iteration count (computed as End-Start) will need to be
    719   // negated.  To avoid the negation, just swap Start and End.
    720   if (IVBump < 0) {
    721     std::swap(Start, End);
    722     IVBump = -IVBump;
    723   }
    724   // Cmp may now have a wrong direction, e.g.  LEs may now be GEs.
    725   // Signedness, and "including equality" are preserved.
    726 
    727   bool RegToImm = Start->isReg() && End->isImm(); // for (reg..imm)
    728   bool RegToReg = Start->isReg() && End->isReg(); // for (reg..reg)
    729 
    730   int64_t StartV = 0, EndV = 0;
    731   if (Start->isImm())
    732     StartV = Start->getImm();
    733   if (End->isImm())
    734     EndV = End->getImm();
    735 
    736   int64_t AdjV = 0;
    737   // To compute the iteration count, we would need this computation:
    738   //   Count = (End - Start + (IVBump-1)) / IVBump
    739   // or, when CmpHasEqual:
    740   //   Count = (End - Start + (IVBump-1)+1) / IVBump
    741   // The "IVBump-1" part is the adjustment (AdjV).  We can avoid
    742   // generating an instruction specifically to add it if we can adjust
    743   // the immediate values for Start or End.
    744 
    745   if (CmpHasEqual) {
    746     // Need to add 1 to the total iteration count.
    747     if (Start->isImm())
    748       StartV--;
    749     else if (End->isImm())
    750       EndV++;
    751     else
    752       AdjV += 1;
    753   }
    754 
    755   if (Cmp != Comparison::NE) {
    756     if (Start->isImm())
    757       StartV -= (IVBump-1);
    758     else if (End->isImm())
    759       EndV += (IVBump-1);
    760     else
    761       AdjV += (IVBump-1);
    762   }
    763 
    764   unsigned R = 0, SR = 0;
    765   if (Start->isReg()) {
    766     R = Start->getReg();
    767     SR = Start->getSubReg();
    768   } else {
    769     R = End->getReg();
    770     SR = End->getSubReg();
    771   }
    772   const TargetRegisterClass *RC = MRI->getRegClass(R);
    773   // Hardware loops cannot handle 64-bit registers.  If it's a double
    774   // register, it has to have a subregister.
    775   if (!SR && RC == &Hexagon::DoubleRegsRegClass)
    776     return 0;
    777   const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
    778 
    779   // Compute DistR (register with the distance between Start and End).
    780   unsigned DistR, DistSR;
    781 
    782   // Avoid special case, where the start value is an imm(0).
    783   if (Start->isImm() && StartV == 0) {
    784     DistR = End->getReg();
    785     DistSR = End->getSubReg();
    786   } else {
    787     const MCInstrDesc &SubD = RegToReg ? TII->get(Hexagon::SUB_rr) :
    788                               (RegToImm ? TII->get(Hexagon::SUB_ri) :
    789                                           TII->get(Hexagon::ADD_ri));
    790     unsigned SubR = MRI->createVirtualRegister(IntRC);
    791     MachineInstrBuilder SubIB =
    792       BuildMI(*PH, InsertPos, DL, SubD, SubR);
    793 
    794     if (RegToReg) {
    795       SubIB.addReg(End->getReg(), 0, End->getSubReg())
    796            .addReg(Start->getReg(), 0, Start->getSubReg());
    797     } else if (RegToImm) {
    798       SubIB.addImm(EndV)
    799            .addReg(Start->getReg(), 0, Start->getSubReg());
    800     } else { // ImmToReg
    801       SubIB.addReg(End->getReg(), 0, End->getSubReg())
    802            .addImm(-StartV);
    803     }
    804     DistR = SubR;
    805     DistSR = 0;
    806   }
    807 
    808   // From DistR, compute AdjR (register with the adjusted distance).
    809   unsigned AdjR, AdjSR;
    810 
    811   if (AdjV == 0) {
    812     AdjR = DistR;
    813     AdjSR = DistSR;
    814   } else {
    815     // Generate CountR = ADD DistR, AdjVal
    816     unsigned AddR = MRI->createVirtualRegister(IntRC);
    817     const MCInstrDesc &AddD = TII->get(Hexagon::ADD_ri);
    818     BuildMI(*PH, InsertPos, DL, AddD, AddR)
    819       .addReg(DistR, 0, DistSR)
    820       .addImm(AdjV);
    821 
    822     AdjR = AddR;
    823     AdjSR = 0;
    824   }
    825 
    826   // From AdjR, compute CountR (register with the final count).
    827   unsigned CountR, CountSR;
    828 
    829   if (IVBump == 1) {
    830     CountR = AdjR;
    831     CountSR = AdjSR;
    832   } else {
    833     // The IV bump is a power of two. Log_2(IV bump) is the shift amount.
    834     unsigned Shift = Log2_32(IVBump);
    835 
    836     // Generate NormR = LSR DistR, Shift.
    837     unsigned LsrR = MRI->createVirtualRegister(IntRC);
    838     const MCInstrDesc &LsrD = TII->get(Hexagon::LSR_ri);
    839     BuildMI(*PH, InsertPos, DL, LsrD, LsrR)
    840       .addReg(AdjR, 0, AdjSR)
    841       .addImm(Shift);
    842 
    843     CountR = LsrR;
    844     CountSR = 0;
    845   }
    846 
    847   return new CountValue(CountValue::CV_Register, CountR, CountSR);
    848 }
    849 
    850 
    851 /// \brief Return true if the operation is invalid within hardware loop.
    852 bool HexagonHardwareLoops::isInvalidLoopOperation(
    853       const MachineInstr *MI) const {
    854 
    855   // call is not allowed because the callee may use a hardware loop
    856   if (MI->getDesc().isCall())
    857     return true;
    858 
    859   // do not allow nested hardware loops
    860   if (isHardwareLoop(MI))
    861     return true;
    862 
    863   // check if the instruction defines a hardware loop register
    864   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    865     const MachineOperand &MO = MI->getOperand(i);
    866     if (!MO.isReg() || !MO.isDef())
    867       continue;
    868     unsigned R = MO.getReg();
    869     if (R == Hexagon::LC0 || R == Hexagon::LC1 ||
    870         R == Hexagon::SA0 || R == Hexagon::SA1)
    871       return true;
    872   }
    873   return false;
    874 }
    875 
    876 
    877 /// \brief - Return true if the loop contains an instruction that inhibits
    878 /// the use of the hardware loop function.
    879 bool HexagonHardwareLoops::containsInvalidInstruction(MachineLoop *L) const {
    880   const std::vector<MachineBasicBlock*> Blocks = L->getBlocks();
    881   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    882     MachineBasicBlock *MBB = Blocks[i];
    883     for (MachineBasicBlock::iterator
    884            MII = MBB->begin(), E = MBB->end(); MII != E; ++MII) {
    885       const MachineInstr *MI = &*MII;
    886       if (isInvalidLoopOperation(MI))
    887         return true;
    888     }
    889   }
    890   return false;
    891 }
    892 
    893 
    894 /// \brief Returns true if the instruction is dead.  This was essentially
    895 /// copied from DeadMachineInstructionElim::isDead, but with special cases
    896 /// for inline asm, physical registers and instructions with side effects
    897 /// removed.
    898 bool HexagonHardwareLoops::isDead(const MachineInstr *MI,
    899                              SmallVector<MachineInstr*, 1> &DeadPhis) const {
    900   // Examine each operand.
    901   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    902     const MachineOperand &MO = MI->getOperand(i);
    903     if (!MO.isReg() || !MO.isDef())
    904       continue;
    905 
    906     unsigned Reg = MO.getReg();
    907     if (MRI->use_nodbg_empty(Reg))
    908       continue;
    909 
    910     typedef MachineRegisterInfo::use_nodbg_iterator use_nodbg_iterator;
    911 
    912     // This instruction has users, but if the only user is the phi node for the
    913     // parent block, and the only use of that phi node is this instruction, then
    914     // this instruction is dead: both it (and the phi node) can be removed.
    915     use_nodbg_iterator I = MRI->use_nodbg_begin(Reg);
    916     use_nodbg_iterator End = MRI->use_nodbg_end();
    917     if (llvm::next(I) != End || !I.getOperand().getParent()->isPHI())
    918       return false;
    919 
    920     MachineInstr *OnePhi = I.getOperand().getParent();
    921     for (unsigned j = 0, f = OnePhi->getNumOperands(); j != f; ++j) {
    922       const MachineOperand &OPO = OnePhi->getOperand(j);
    923       if (!OPO.isReg() || !OPO.isDef())
    924         continue;
    925 
    926       unsigned OPReg = OPO.getReg();
    927       use_nodbg_iterator nextJ;
    928       for (use_nodbg_iterator J = MRI->use_nodbg_begin(OPReg);
    929            J != End; J = nextJ) {
    930         nextJ = llvm::next(J);
    931         MachineOperand &Use = J.getOperand();
    932         MachineInstr *UseMI = Use.getParent();
    933 
    934         // If the phi node has a user that is not MI, bail...
    935         if (MI != UseMI)
    936           return false;
    937       }
    938     }
    939     DeadPhis.push_back(OnePhi);
    940   }
    941 
    942   // If there are no defs with uses, the instruction is dead.
    943   return true;
    944 }
    945 
    946 void HexagonHardwareLoops::removeIfDead(MachineInstr *MI) {
    947   // This procedure was essentially copied from DeadMachineInstructionElim.
    948 
    949   SmallVector<MachineInstr*, 1> DeadPhis;
    950   if (isDead(MI, DeadPhis)) {
    951     DEBUG(dbgs() << "HW looping will remove: " << *MI);
    952 
    953     // It is possible that some DBG_VALUE instructions refer to this
    954     // instruction.  Examine each def operand for such references;
    955     // if found, mark the DBG_VALUE as undef (but don't delete it).
    956     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    957       const MachineOperand &MO = MI->getOperand(i);
    958       if (!MO.isReg() || !MO.isDef())
    959         continue;
    960       unsigned Reg = MO.getReg();
    961       MachineRegisterInfo::use_iterator nextI;
    962       for (MachineRegisterInfo::use_iterator I = MRI->use_begin(Reg),
    963            E = MRI->use_end(); I != E; I = nextI) {
    964         nextI = llvm::next(I);  // I is invalidated by the setReg
    965         MachineOperand &Use = I.getOperand();
    966         MachineInstr *UseMI = Use.getParent();
    967         if (UseMI == MI)
    968           continue;
    969         if (Use.isDebug())
    970           UseMI->getOperand(0).setReg(0U);
    971         // This may also be a "instr -> phi -> instr" case which can
    972         // be removed too.
    973       }
    974     }
    975 
    976     MI->eraseFromParent();
    977     for (unsigned i = 0; i < DeadPhis.size(); ++i)
    978       DeadPhis[i]->eraseFromParent();
    979   }
    980 }
    981 
    982 /// \brief Check if the loop is a candidate for converting to a hardware
    983 /// loop.  If so, then perform the transformation.
    984 ///
    985 /// This function works on innermost loops first.  A loop can be converted
    986 /// if it is a counting loop; either a register value or an immediate.
    987 ///
    988 /// The code makes several assumptions about the representation of the loop
    989 /// in llvm.
    990 bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L) {
    991   // This is just for sanity.
    992   assert(L->getHeader() && "Loop without a header?");
    993 
    994   bool Changed = false;
    995   // Process nested loops first.
    996   for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I)
    997     Changed |= convertToHardwareLoop(*I);
    998 
    999   // If a nested loop has been converted, then we can't convert this loop.
   1000   if (Changed)
   1001     return Changed;
   1002 
   1003 #ifndef NDEBUG
   1004   // Stop trying after reaching the limit (if any).
   1005   int Limit = HWLoopLimit;
   1006   if (Limit >= 0) {
   1007     if (Counter >= HWLoopLimit)
   1008       return false;
   1009     Counter++;
   1010   }
   1011 #endif
   1012 
   1013   // Does the loop contain any invalid instructions?
   1014   if (containsInvalidInstruction(L))
   1015     return false;
   1016 
   1017   // Is the induction variable bump feeding the latch condition?
   1018   if (!fixupInductionVariable(L))
   1019     return false;
   1020 
   1021   MachineBasicBlock *LastMBB = L->getExitingBlock();
   1022   // Don't generate hw loop if the loop has more than one exit.
   1023   if (LastMBB == 0)
   1024     return false;
   1025 
   1026   MachineBasicBlock::iterator LastI = LastMBB->getFirstTerminator();
   1027   if (LastI == LastMBB->end())
   1028     return false;
   1029 
   1030   // Ensure the loop has a preheader: the loop instruction will be
   1031   // placed there.
   1032   bool NewPreheader = false;
   1033   MachineBasicBlock *Preheader = L->getLoopPreheader();
   1034   if (!Preheader) {
   1035     Preheader = createPreheaderForLoop(L);
   1036     if (!Preheader)
   1037       return false;
   1038     NewPreheader = true;
   1039   }
   1040   MachineBasicBlock::iterator InsertPos = Preheader->getFirstTerminator();
   1041 
   1042   SmallVector<MachineInstr*, 2> OldInsts;
   1043   // Are we able to determine the trip count for the loop?
   1044   CountValue *TripCount = getLoopTripCount(L, OldInsts);
   1045   if (TripCount == 0)
   1046     return false;
   1047 
   1048   // Is the trip count available in the preheader?
   1049   if (TripCount->isReg()) {
   1050     // There will be a use of the register inserted into the preheader,
   1051     // so make sure that the register is actually defined at that point.
   1052     MachineInstr *TCDef = MRI->getVRegDef(TripCount->getReg());
   1053     MachineBasicBlock *BBDef = TCDef->getParent();
   1054     if (!NewPreheader) {
   1055       if (!MDT->dominates(BBDef, Preheader))
   1056         return false;
   1057     } else {
   1058       // If we have just created a preheader, the dominator tree won't be
   1059       // aware of it.  Check if the definition of the register dominates
   1060       // the header, but is not the header itself.
   1061       if (!MDT->properlyDominates(BBDef, L->getHeader()))
   1062         return false;
   1063     }
   1064   }
   1065 
   1066   // Determine the loop start.
   1067   MachineBasicBlock *LoopStart = L->getTopBlock();
   1068   if (L->getLoopLatch() != LastMBB) {
   1069     // When the exit and latch are not the same, use the latch block as the
   1070     // start.
   1071     // The loop start address is used only after the 1st iteration, and the
   1072     // loop latch may contains instrs. that need to be executed after the
   1073     // first iteration.
   1074     LoopStart = L->getLoopLatch();
   1075     // Make sure the latch is a successor of the exit, otherwise it won't work.
   1076     if (!LastMBB->isSuccessor(LoopStart))
   1077       return false;
   1078   }
   1079 
   1080   // Convert the loop to a hardware loop.
   1081   DEBUG(dbgs() << "Change to hardware loop at "; L->dump());
   1082   DebugLoc DL;
   1083   if (InsertPos != Preheader->end())
   1084     DL = InsertPos->getDebugLoc();
   1085 
   1086   if (TripCount->isReg()) {
   1087     // Create a copy of the loop count register.
   1088     unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
   1089     BuildMI(*Preheader, InsertPos, DL, TII->get(TargetOpcode::COPY), CountReg)
   1090       .addReg(TripCount->getReg(), 0, TripCount->getSubReg());
   1091     // Add the Loop instruction to the beginning of the loop.
   1092     BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
   1093       .addMBB(LoopStart)
   1094       .addReg(CountReg);
   1095   } else {
   1096     assert(TripCount->isImm() && "Expecting immediate value for trip count");
   1097     // Add the Loop immediate instruction to the beginning of the loop,
   1098     // if the immediate fits in the instructions.  Otherwise, we need to
   1099     // create a new virtual register.
   1100     int64_t CountImm = TripCount->getImm();
   1101     if (!TII->isValidOffset(Hexagon::LOOP0_i, CountImm)) {
   1102       unsigned CountReg = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
   1103       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::TFRI), CountReg)
   1104         .addImm(CountImm);
   1105       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_r))
   1106         .addMBB(LoopStart).addReg(CountReg);
   1107     } else
   1108       BuildMI(*Preheader, InsertPos, DL, TII->get(Hexagon::LOOP0_i))
   1109         .addMBB(LoopStart).addImm(CountImm);
   1110   }
   1111 
   1112   // Make sure the loop start always has a reference in the CFG.  We need
   1113   // to create a BlockAddress operand to get this mechanism to work both the
   1114   // MachineBasicBlock and BasicBlock objects need the flag set.
   1115   LoopStart->setHasAddressTaken();
   1116   // This line is needed to set the hasAddressTaken flag on the BasicBlock
   1117   // object.
   1118   BlockAddress::get(const_cast<BasicBlock *>(LoopStart->getBasicBlock()));
   1119 
   1120   // Replace the loop branch with an endloop instruction.
   1121   DebugLoc LastIDL = LastI->getDebugLoc();
   1122   BuildMI(*LastMBB, LastI, LastIDL,
   1123           TII->get(Hexagon::ENDLOOP0)).addMBB(LoopStart);
   1124 
   1125   // The loop ends with either:
   1126   //  - a conditional branch followed by an unconditional branch, or
   1127   //  - a conditional branch to the loop start.
   1128   if (LastI->getOpcode() == Hexagon::JMP_c ||
   1129       LastI->getOpcode() == Hexagon::JMP_cNot) {
   1130     // Delete one and change/add an uncond. branch to out of the loop.
   1131     MachineBasicBlock *BranchTarget = LastI->getOperand(1).getMBB();
   1132     LastI = LastMBB->erase(LastI);
   1133     if (!L->contains(BranchTarget)) {
   1134       if (LastI != LastMBB->end())
   1135         LastI = LastMBB->erase(LastI);
   1136       SmallVector<MachineOperand, 0> Cond;
   1137       TII->InsertBranch(*LastMBB, BranchTarget, 0, Cond, LastIDL);
   1138     }
   1139   } else {
   1140     // Conditional branch to loop start; just delete it.
   1141     LastMBB->erase(LastI);
   1142   }
   1143   delete TripCount;
   1144 
   1145   // The induction operation and the comparison may now be
   1146   // unneeded. If these are unneeded, then remove them.
   1147   for (unsigned i = 0; i < OldInsts.size(); ++i)
   1148     removeIfDead(OldInsts[i]);
   1149 
   1150   ++NumHWLoops;
   1151   return true;
   1152 }
   1153 
   1154 
   1155 bool HexagonHardwareLoops::orderBumpCompare(MachineInstr *BumpI,
   1156                                             MachineInstr *CmpI) {
   1157   assert (BumpI != CmpI && "Bump and compare in the same instruction?");
   1158 
   1159   MachineBasicBlock *BB = BumpI->getParent();
   1160   if (CmpI->getParent() != BB)
   1161     return false;
   1162 
   1163   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1164   // Check if things are in order to begin with.
   1165   for (instr_iterator I = BumpI, E = BB->instr_end(); I != E; ++I)
   1166     if (&*I == CmpI)
   1167       return true;
   1168 
   1169   // Out of order.
   1170   unsigned PredR = CmpI->getOperand(0).getReg();
   1171   bool FoundBump = false;
   1172   instr_iterator CmpIt = CmpI, NextIt = llvm::next(CmpIt);
   1173   for (instr_iterator I = NextIt, E = BB->instr_end(); I != E; ++I) {
   1174     MachineInstr *In = &*I;
   1175     for (unsigned i = 0, n = In->getNumOperands(); i < n; ++i) {
   1176       MachineOperand &MO = In->getOperand(i);
   1177       if (MO.isReg() && MO.isUse()) {
   1178         if (MO.getReg() == PredR)  // Found an intervening use of PredR.
   1179           return false;
   1180       }
   1181     }
   1182 
   1183     if (In == BumpI) {
   1184       instr_iterator After = BumpI;
   1185       instr_iterator From = CmpI;
   1186       BB->splice(llvm::next(After), BB, From);
   1187       FoundBump = true;
   1188       break;
   1189     }
   1190   }
   1191   assert (FoundBump && "Cannot determine instruction order");
   1192   return FoundBump;
   1193 }
   1194 
   1195 
   1196 MachineInstr *HexagonHardwareLoops::defWithImmediate(unsigned R) {
   1197   MachineInstr *DI = MRI->getVRegDef(R);
   1198   unsigned DOpc = DI->getOpcode();
   1199   switch (DOpc) {
   1200     case Hexagon::TFRI:
   1201     case Hexagon::TFRI64:
   1202     case Hexagon::CONST32_Int_Real:
   1203     case Hexagon::CONST64_Int_Real:
   1204       return DI;
   1205   }
   1206   return 0;
   1207 }
   1208 
   1209 
   1210 int64_t HexagonHardwareLoops::getImmediate(MachineOperand &MO) {
   1211   if (MO.isImm())
   1212     return MO.getImm();
   1213   assert(MO.isReg());
   1214   unsigned R = MO.getReg();
   1215   MachineInstr *DI = defWithImmediate(R);
   1216   assert(DI && "Need an immediate operand");
   1217   // All currently supported "define-with-immediate" instructions have the
   1218   // actual immediate value in the operand(1).
   1219   int64_t v = DI->getOperand(1).getImm();
   1220   return v;
   1221 }
   1222 
   1223 
   1224 void HexagonHardwareLoops::setImmediate(MachineOperand &MO, int64_t Val) {
   1225   if (MO.isImm()) {
   1226     MO.setImm(Val);
   1227     return;
   1228   }
   1229 
   1230   assert(MO.isReg());
   1231   unsigned R = MO.getReg();
   1232   MachineInstr *DI = defWithImmediate(R);
   1233   if (MRI->hasOneNonDBGUse(R)) {
   1234     // If R has only one use, then just change its defining instruction to
   1235     // the new immediate value.
   1236     DI->getOperand(1).setImm(Val);
   1237     return;
   1238   }
   1239 
   1240   const TargetRegisterClass *RC = MRI->getRegClass(R);
   1241   unsigned NewR = MRI->createVirtualRegister(RC);
   1242   MachineBasicBlock &B = *DI->getParent();
   1243   DebugLoc DL = DI->getDebugLoc();
   1244   BuildMI(B, DI, DL, TII->get(DI->getOpcode()), NewR)
   1245     .addImm(Val);
   1246   MO.setReg(NewR);
   1247 }
   1248 
   1249 
   1250 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   1251   MachineBasicBlock *Header = L->getHeader();
   1252   MachineBasicBlock *Preheader = L->getLoopPreheader();
   1253   MachineBasicBlock *Latch = L->getLoopLatch();
   1254 
   1255   if (!Header || !Preheader || !Latch)
   1256     return false;
   1257 
   1258   // These data structures follow the same concept as the corresponding
   1259   // ones in findInductionRegister (where some comments are).
   1260   typedef std::pair<unsigned,int64_t> RegisterBump;
   1261   typedef std::pair<unsigned,RegisterBump> RegisterInduction;
   1262   typedef std::set<RegisterInduction> RegisterInductionSet;
   1263 
   1264   // Register candidates for induction variables, with their associated bumps.
   1265   RegisterInductionSet IndRegs;
   1266 
   1267   // Look for induction patterns:
   1268   //   vreg1 = PHI ..., [ latch, vreg2 ]
   1269   //   vreg2 = ADD vreg1, imm
   1270   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1271   for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1272        I != E && I->isPHI(); ++I) {
   1273     MachineInstr *Phi = &*I;
   1274 
   1275     // Have a PHI instruction.
   1276     for (unsigned i = 1, n = Phi->getNumOperands(); i < n; i += 2) {
   1277       if (Phi->getOperand(i+1).getMBB() != Latch)
   1278         continue;
   1279 
   1280       unsigned PhiReg = Phi->getOperand(i).getReg();
   1281       MachineInstr *DI = MRI->getVRegDef(PhiReg);
   1282       unsigned UpdOpc = DI->getOpcode();
   1283       bool isAdd = (UpdOpc == Hexagon::ADD_ri);
   1284 
   1285       if (isAdd) {
   1286         // If the register operand to the add/sub is the PHI we are looking
   1287         // at, this meets the induction pattern.
   1288         unsigned IndReg = DI->getOperand(1).getReg();
   1289         if (MRI->getVRegDef(IndReg) == Phi) {
   1290           unsigned UpdReg = DI->getOperand(0).getReg();
   1291           int64_t V = DI->getOperand(2).getImm();
   1292           IndRegs.insert(std::make_pair(UpdReg, std::make_pair(IndReg, V)));
   1293         }
   1294       }
   1295     }  // for (i)
   1296   }  // for (instr)
   1297 
   1298   if (IndRegs.empty())
   1299     return false;
   1300 
   1301   MachineBasicBlock *TB = 0, *FB = 0;
   1302   SmallVector<MachineOperand,2> Cond;
   1303   // AnalyzeBranch returns true if it fails to analyze branch.
   1304   bool NotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Cond, false);
   1305   if (NotAnalyzed)
   1306     return false;
   1307 
   1308   // Check if the latch branch is unconditional.
   1309   if (Cond.empty())
   1310     return false;
   1311 
   1312   if (TB != Header && FB != Header)
   1313     // The latch does not go back to the header.  Not a latch we know and love.
   1314     return false;
   1315 
   1316   // Expecting a predicate register as a condition.  It won't be a hardware
   1317   // predicate register at this point yet, just a vreg.
   1318   // HexagonInstrInfo::AnalyzeBranch for negated branches inserts imm(0)
   1319   // into Cond, followed by the predicate register.  For non-negated branches
   1320   // it's just the register.
   1321   unsigned CSz = Cond.size();
   1322   if (CSz != 1 && CSz != 2)
   1323     return false;
   1324 
   1325   unsigned P = Cond[CSz-1].getReg();
   1326   MachineInstr *PredDef = MRI->getVRegDef(P);
   1327 
   1328   if (!PredDef->isCompare())
   1329     return false;
   1330 
   1331   SmallSet<unsigned,2> CmpRegs;
   1332   MachineOperand *CmpImmOp = 0;
   1333 
   1334   // Go over all operands to the compare and look for immediate and register
   1335   // operands.  Assume that if the compare has a single register use and a
   1336   // single immediate operand, then the register is being compared with the
   1337   // immediate value.
   1338   for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
   1339     MachineOperand &MO = PredDef->getOperand(i);
   1340     if (MO.isReg()) {
   1341       // Skip all implicit references.  In one case there was:
   1342       //   %vreg140<def> = FCMPUGT32_rr %vreg138, %vreg139, %USR<imp-use>
   1343       if (MO.isImplicit())
   1344         continue;
   1345       if (MO.isUse()) {
   1346         unsigned R = MO.getReg();
   1347         if (!defWithImmediate(R)) {
   1348           CmpRegs.insert(MO.getReg());
   1349           continue;
   1350         }
   1351         // Consider the register to be the "immediate" operand.
   1352         if (CmpImmOp)
   1353           return false;
   1354         CmpImmOp = &MO;
   1355       }
   1356     } else if (MO.isImm()) {
   1357       if (CmpImmOp)    // A second immediate argument?  Confusing.  Bail out.
   1358         return false;
   1359       CmpImmOp = &MO;
   1360     }
   1361   }
   1362 
   1363   if (CmpRegs.empty())
   1364     return false;
   1365 
   1366   // Check if the compared register follows the order we want.  Fix if needed.
   1367   for (RegisterInductionSet::iterator I = IndRegs.begin(), E = IndRegs.end();
   1368        I != E; ++I) {
   1369     // This is a success.  If the register used in the comparison is one that
   1370     // we have identified as a bumped (updated) induction register, there is
   1371     // nothing to do.
   1372     if (CmpRegs.count(I->first))
   1373       return true;
   1374 
   1375     // Otherwise, if the register being compared comes out of a PHI node,
   1376     // and has been recognized as following the induction pattern, and is
   1377     // compared against an immediate, we can fix it.
   1378     const RegisterBump &RB = I->second;
   1379     if (CmpRegs.count(RB.first)) {
   1380       if (!CmpImmOp)
   1381         return false;
   1382 
   1383       int64_t CmpImm = getImmediate(*CmpImmOp);
   1384       int64_t V = RB.second;
   1385       if (V > 0 && CmpImm+V < CmpImm)  // Overflow (64-bit).
   1386         return false;
   1387       if (V < 0 && CmpImm+V > CmpImm)  // Overflow (64-bit).
   1388         return false;
   1389       CmpImm += V;
   1390       // Some forms of cmp-immediate allow u9 and s10.  Assume the worst case
   1391       // scenario, i.e. an 8-bit value.
   1392       if (CmpImmOp->isImm() && !isInt<8>(CmpImm))
   1393         return false;
   1394 
   1395       // Make sure that the compare happens after the bump.  Otherwise,
   1396       // after the fixup, the compare would use a yet-undefined register.
   1397       MachineInstr *BumpI = MRI->getVRegDef(I->first);
   1398       bool Order = orderBumpCompare(BumpI, PredDef);
   1399       if (!Order)
   1400         return false;
   1401 
   1402       // Finally, fix the compare instruction.
   1403       setImmediate(*CmpImmOp, CmpImm);
   1404       for (unsigned i = 0, n = PredDef->getNumOperands(); i < n; ++i) {
   1405         MachineOperand &MO = PredDef->getOperand(i);
   1406         if (MO.isReg() && MO.getReg() == RB.first) {
   1407           MO.setReg(I->first);
   1408           return true;
   1409         }
   1410       }
   1411     }
   1412   }
   1413 
   1414   return false;
   1415 }
   1416 
   1417 
   1418 /// \brief Create a preheader for a given loop.
   1419 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   1420       MachineLoop *L) {
   1421   if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
   1422     return TmpPH;
   1423 
   1424   MachineBasicBlock *Header = L->getHeader();
   1425   MachineBasicBlock *Latch = L->getLoopLatch();
   1426   MachineFunction *MF = Header->getParent();
   1427   DebugLoc DL;
   1428 
   1429   if (!Latch || Header->hasAddressTaken())
   1430     return 0;
   1431 
   1432   typedef MachineBasicBlock::instr_iterator instr_iterator;
   1433   typedef MachineBasicBlock::pred_iterator pred_iterator;
   1434 
   1435   // Verify that all existing predecessors have analyzable branches
   1436   // (or no branches at all).
   1437   typedef std::vector<MachineBasicBlock*> MBBVector;
   1438   MBBVector Preds(Header->pred_begin(), Header->pred_end());
   1439   SmallVector<MachineOperand,2> Tmp1;
   1440   MachineBasicBlock *TB = 0, *FB = 0;
   1441 
   1442   if (TII->AnalyzeBranch(*Latch, TB, FB, Tmp1, false))
   1443     return 0;
   1444 
   1445   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
   1446     MachineBasicBlock *PB = *I;
   1447     if (PB != Latch) {
   1448       bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp1, false);
   1449       if (NotAnalyzed)
   1450         return 0;
   1451     }
   1452   }
   1453 
   1454   MachineBasicBlock *NewPH = MF->CreateMachineBasicBlock();
   1455   MF->insert(Header, NewPH);
   1456 
   1457   if (Header->pred_size() > 2) {
   1458     // Ensure that the header has only two predecessors: the preheader and
   1459     // the loop latch.  Any additional predecessors of the header should
   1460     // join at the newly created preheader.  Inspect all PHI nodes from the
   1461     // header and create appropriate corresponding PHI nodes in the preheader.
   1462 
   1463     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1464          I != E && I->isPHI(); ++I) {
   1465       MachineInstr *PN = &*I;
   1466 
   1467       const MCInstrDesc &PD = TII->get(TargetOpcode::PHI);
   1468       MachineInstr *NewPN = MF->CreateMachineInstr(PD, DL);
   1469       NewPH->insert(NewPH->end(), NewPN);
   1470 
   1471       unsigned PR = PN->getOperand(0).getReg();
   1472       const TargetRegisterClass *RC = MRI->getRegClass(PR);
   1473       unsigned NewPR = MRI->createVirtualRegister(RC);
   1474       NewPN->addOperand(MachineOperand::CreateReg(NewPR, true));
   1475 
   1476       // Copy all non-latch operands of a header's PHI node to the newly
   1477       // created PHI node in the preheader.
   1478       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
   1479         unsigned PredR = PN->getOperand(i).getReg();
   1480         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
   1481         if (PredB == Latch)
   1482           continue;
   1483 
   1484         NewPN->addOperand(MachineOperand::CreateReg(PredR, false));
   1485         NewPN->addOperand(MachineOperand::CreateMBB(PredB));
   1486       }
   1487 
   1488       // Remove copied operands from the old PHI node and add the value
   1489       // coming from the preheader's PHI.
   1490       for (int i = PN->getNumOperands()-2; i > 0; i -= 2) {
   1491         MachineBasicBlock *PredB = PN->getOperand(i+1).getMBB();
   1492         if (PredB != Latch) {
   1493           PN->RemoveOperand(i+1);
   1494           PN->RemoveOperand(i);
   1495         }
   1496       }
   1497       PN->addOperand(MachineOperand::CreateReg(NewPR, false));
   1498       PN->addOperand(MachineOperand::CreateMBB(NewPH));
   1499     }
   1500 
   1501   } else {
   1502     assert(Header->pred_size() == 2);
   1503 
   1504     // The header has only two predecessors, but the non-latch predecessor
   1505     // is not a preheader (e.g. it has other successors, etc.)
   1506     // In such a case we don't need any extra PHI nodes in the new preheader,
   1507     // all we need is to adjust existing PHIs in the header to now refer to
   1508     // the new preheader.
   1509     for (instr_iterator I = Header->instr_begin(), E = Header->instr_end();
   1510          I != E && I->isPHI(); ++I) {
   1511       MachineInstr *PN = &*I;
   1512       for (unsigned i = 1, n = PN->getNumOperands(); i < n; i += 2) {
   1513         MachineOperand &MO = PN->getOperand(i+1);
   1514         if (MO.getMBB() != Latch)
   1515           MO.setMBB(NewPH);
   1516       }
   1517     }
   1518   }
   1519 
   1520   // "Reroute" the CFG edges to link in the new preheader.
   1521   // If any of the predecessors falls through to the header, insert a branch
   1522   // to the new preheader in that place.
   1523   SmallVector<MachineOperand,1> Tmp2;
   1524   SmallVector<MachineOperand,1> EmptyCond;
   1525 
   1526   TB = FB = 0;
   1527 
   1528   for (MBBVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) {
   1529     MachineBasicBlock *PB = *I;
   1530     if (PB != Latch) {
   1531       Tmp2.clear();
   1532       bool NotAnalyzed = TII->AnalyzeBranch(*PB, TB, FB, Tmp2, false);
   1533       (void)NotAnalyzed; // supress compiler warning
   1534       assert (!NotAnalyzed && "Should be analyzable!");
   1535       if (TB != Header && (Tmp2.empty() || FB != Header))
   1536         TII->InsertBranch(*PB, NewPH, 0, EmptyCond, DL);
   1537       PB->ReplaceUsesOfBlockWith(Header, NewPH);
   1538     }
   1539   }
   1540 
   1541   // It can happen that the latch block will fall through into the header.
   1542   // Insert an unconditional branch to the header.
   1543   TB = FB = 0;
   1544   bool LatchNotAnalyzed = TII->AnalyzeBranch(*Latch, TB, FB, Tmp2, false);
   1545   (void)LatchNotAnalyzed; // supress compiler warning
   1546   assert (!LatchNotAnalyzed && "Should be analyzable!");
   1547   if (!TB && !FB)
   1548     TII->InsertBranch(*Latch, Header, 0, EmptyCond, DL);
   1549 
   1550   // Finally, the branch from the preheader to the header.
   1551   TII->InsertBranch(*NewPH, Header, 0, EmptyCond, DL);
   1552   NewPH->addSuccessor(Header);
   1553 
   1554   return NewPH;
   1555 }
   1556