Home | History | Annotate | Download | only in Hexagon
      1 //===--- HexagonBitSimplify.cpp -------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #define DEBUG_TYPE "hexbit"
     11 
     12 #include "HexagonBitTracker.h"
     13 #include "HexagonTargetMachine.h"
     14 #include "llvm/CodeGen/MachineDominators.h"
     15 #include "llvm/CodeGen/MachineFunctionPass.h"
     16 #include "llvm/CodeGen/MachineInstrBuilder.h"
     17 #include "llvm/CodeGen/MachineRegisterInfo.h"
     18 #include "llvm/CodeGen/Passes.h"
     19 #include "llvm/Support/Debug.h"
     20 #include "llvm/Support/raw_ostream.h"
     21 #include "llvm/Target/TargetInstrInfo.h"
     22 #include "llvm/Target/TargetMachine.h"
     23 
     24 using namespace llvm;
     25 
     26 namespace llvm {
     27   void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
     28   FunctionPass *createHexagonBitSimplify();
     29 }
     30 
     31 namespace {
     32   // Set of virtual registers, based on BitVector.
     33   struct RegisterSet : private BitVector {
     34     RegisterSet() : BitVector() {}
     35     explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
     36     RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
     37 
     38     using BitVector::clear;
     39     using BitVector::count;
     40 
     41     unsigned find_first() const {
     42       int First = BitVector::find_first();
     43       if (First < 0)
     44         return 0;
     45       return x2v(First);
     46     }
     47 
     48     unsigned find_next(unsigned Prev) const {
     49       int Next = BitVector::find_next(v2x(Prev));
     50       if (Next < 0)
     51         return 0;
     52       return x2v(Next);
     53     }
     54 
     55     RegisterSet &insert(unsigned R) {
     56       unsigned Idx = v2x(R);
     57       ensure(Idx);
     58       return static_cast<RegisterSet&>(BitVector::set(Idx));
     59     }
     60     RegisterSet &remove(unsigned R) {
     61       unsigned Idx = v2x(R);
     62       if (Idx >= size())
     63         return *this;
     64       return static_cast<RegisterSet&>(BitVector::reset(Idx));
     65     }
     66 
     67     RegisterSet &insert(const RegisterSet &Rs) {
     68       return static_cast<RegisterSet&>(BitVector::operator|=(Rs));
     69     }
     70     RegisterSet &remove(const RegisterSet &Rs) {
     71       return static_cast<RegisterSet&>(BitVector::reset(Rs));
     72     }
     73 
     74     reference operator[](unsigned R) {
     75       unsigned Idx = v2x(R);
     76       ensure(Idx);
     77       return BitVector::operator[](Idx);
     78     }
     79     bool operator[](unsigned R) const {
     80       unsigned Idx = v2x(R);
     81       assert(Idx < size());
     82       return BitVector::operator[](Idx);
     83     }
     84     bool has(unsigned R) const {
     85       unsigned Idx = v2x(R);
     86       if (Idx >= size())
     87         return false;
     88       return BitVector::test(Idx);
     89     }
     90 
     91     bool empty() const {
     92       return !BitVector::any();
     93     }
     94     bool includes(const RegisterSet &Rs) const {
     95       // A.BitVector::test(B)  <=>  A-B != {}
     96       return !Rs.BitVector::test(*this);
     97     }
     98     bool intersects(const RegisterSet &Rs) const {
     99       return BitVector::anyCommon(Rs);
    100     }
    101 
    102   private:
    103     void ensure(unsigned Idx) {
    104       if (size() <= Idx)
    105         resize(std::max(Idx+1, 32U));
    106     }
    107     static inline unsigned v2x(unsigned v) {
    108       return TargetRegisterInfo::virtReg2Index(v);
    109     }
    110     static inline unsigned x2v(unsigned x) {
    111       return TargetRegisterInfo::index2VirtReg(x);
    112     }
    113   };
    114 
    115 
    116   struct PrintRegSet {
    117     PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
    118       : RS(S), TRI(RI) {}
    119     friend raw_ostream &operator<< (raw_ostream &OS,
    120           const PrintRegSet &P);
    121   private:
    122     const RegisterSet &RS;
    123     const TargetRegisterInfo *TRI;
    124   };
    125 
    126   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P)
    127     LLVM_ATTRIBUTE_UNUSED;
    128   raw_ostream &operator<< (raw_ostream &OS, const PrintRegSet &P) {
    129     OS << '{';
    130     for (unsigned R = P.RS.find_first(); R; R = P.RS.find_next(R))
    131       OS << ' ' << PrintReg(R, P.TRI);
    132     OS << " }";
    133     return OS;
    134   }
    135 }
    136 
    137 
    138 namespace {
    139   class Transformation;
    140 
    141   class HexagonBitSimplify : public MachineFunctionPass {
    142   public:
    143     static char ID;
    144     HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
    145       initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
    146     }
    147     virtual const char *getPassName() const {
    148       return "Hexagon bit simplification";
    149     }
    150     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
    151       AU.addRequired<MachineDominatorTree>();
    152       AU.addPreserved<MachineDominatorTree>();
    153       MachineFunctionPass::getAnalysisUsage(AU);
    154     }
    155     virtual bool runOnMachineFunction(MachineFunction &MF);
    156 
    157     static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
    158     static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
    159     static bool isEqual(const BitTracker::RegisterCell &RC1, uint16_t B1,
    160         const BitTracker::RegisterCell &RC2, uint16_t B2, uint16_t W);
    161     static bool isZero(const BitTracker::RegisterCell &RC, uint16_t B,
    162         uint16_t W);
    163     static bool getConst(const BitTracker::RegisterCell &RC, uint16_t B,
    164         uint16_t W, uint64_t &U);
    165     static bool replaceReg(unsigned OldR, unsigned NewR,
    166         MachineRegisterInfo &MRI);
    167     static bool getSubregMask(const BitTracker::RegisterRef &RR,
    168         unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI);
    169     static bool replaceRegWithSub(unsigned OldR, unsigned NewR,
    170         unsigned NewSR, MachineRegisterInfo &MRI);
    171     static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
    172         unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
    173     static bool parseRegSequence(const MachineInstr &I,
    174         BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
    175 
    176     static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
    177         uint16_t Begin);
    178     static bool getUsedBits(unsigned Opc, unsigned OpN, BitVector &Bits,
    179         uint16_t Begin, const HexagonInstrInfo &HII);
    180 
    181     static const TargetRegisterClass *getFinalVRegClass(
    182         const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI);
    183     static bool isTransparentCopy(const BitTracker::RegisterRef &RD,
    184         const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI);
    185 
    186   private:
    187     MachineDominatorTree *MDT;
    188 
    189     bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
    190   };
    191 
    192   char HexagonBitSimplify::ID = 0;
    193   typedef HexagonBitSimplify HBS;
    194 
    195 
    196   // The purpose of this class is to provide a common facility to traverse
    197   // the function top-down or bottom-up via the dominator tree, and keep
    198   // track of the available registers.
    199   class Transformation {
    200   public:
    201     bool TopDown;
    202     Transformation(bool TD) : TopDown(TD) {}
    203     virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
    204     virtual ~Transformation() {}
    205   };
    206 }
    207 
    208 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
    209       "Hexagon bit simplification", false, false)
    210 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    211 INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
    212       "Hexagon bit simplification", false, false)
    213 
    214 
    215 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
    216       RegisterSet &AVs) {
    217   MachineDomTreeNode *N = MDT->getNode(&B);
    218   typedef GraphTraits<MachineDomTreeNode*> GTN;
    219   bool Changed = false;
    220 
    221   if (T.TopDown)
    222     Changed = T.processBlock(B, AVs);
    223 
    224   RegisterSet Defs;
    225   for (auto &I : B)
    226     getInstrDefs(I, Defs);
    227   RegisterSet NewAVs = AVs;
    228   NewAVs.insert(Defs);
    229 
    230   for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I) {
    231     MachineBasicBlock *SB = (*I)->getBlock();
    232     Changed |= visitBlock(*SB, T, NewAVs);
    233   }
    234   if (!T.TopDown)
    235     Changed |= T.processBlock(B, AVs);
    236 
    237   return Changed;
    238 }
    239 
    240 //
    241 // Utility functions:
    242 //
    243 void HexagonBitSimplify::getInstrDefs(const MachineInstr &MI,
    244       RegisterSet &Defs) {
    245   for (auto &Op : MI.operands()) {
    246     if (!Op.isReg() || !Op.isDef())
    247       continue;
    248     unsigned R = Op.getReg();
    249     if (!TargetRegisterInfo::isVirtualRegister(R))
    250       continue;
    251     Defs.insert(R);
    252   }
    253 }
    254 
    255 void HexagonBitSimplify::getInstrUses(const MachineInstr &MI,
    256       RegisterSet &Uses) {
    257   for (auto &Op : MI.operands()) {
    258     if (!Op.isReg() || !Op.isUse())
    259       continue;
    260     unsigned R = Op.getReg();
    261     if (!TargetRegisterInfo::isVirtualRegister(R))
    262       continue;
    263     Uses.insert(R);
    264   }
    265 }
    266 
    267 // Check if all the bits in range [B, E) in both cells are equal.
    268 bool HexagonBitSimplify::isEqual(const BitTracker::RegisterCell &RC1,
    269       uint16_t B1, const BitTracker::RegisterCell &RC2, uint16_t B2,
    270       uint16_t W) {
    271   for (uint16_t i = 0; i < W; ++i) {
    272     // If RC1[i] is "bottom", it cannot be proven equal to RC2[i].
    273     if (RC1[B1+i].Type == BitTracker::BitValue::Ref && RC1[B1+i].RefI.Reg == 0)
    274       return false;
    275     // Same for RC2[i].
    276     if (RC2[B2+i].Type == BitTracker::BitValue::Ref && RC2[B2+i].RefI.Reg == 0)
    277       return false;
    278     if (RC1[B1+i] != RC2[B2+i])
    279       return false;
    280   }
    281   return true;
    282 }
    283 
    284 bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
    285       uint16_t B, uint16_t W) {
    286   assert(B < RC.width() && B+W <= RC.width());
    287   for (uint16_t i = B; i < B+W; ++i)
    288     if (!RC[i].is(0))
    289       return false;
    290   return true;
    291 }
    292 
    293 
    294 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
    295         uint16_t B, uint16_t W, uint64_t &U) {
    296   assert(B < RC.width() && B+W <= RC.width());
    297   int64_t T = 0;
    298   for (uint16_t i = B+W; i > B; --i) {
    299     const BitTracker::BitValue &BV = RC[i-1];
    300     T <<= 1;
    301     if (BV.is(1))
    302       T |= 1;
    303     else if (!BV.is(0))
    304       return false;
    305   }
    306   U = T;
    307   return true;
    308 }
    309 
    310 
    311 bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
    312       MachineRegisterInfo &MRI) {
    313   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
    314       !TargetRegisterInfo::isVirtualRegister(NewR))
    315     return false;
    316   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
    317   decltype(End) NextI;
    318   for (auto I = Begin; I != End; I = NextI) {
    319     NextI = std::next(I);
    320     I->setReg(NewR);
    321   }
    322   return Begin != End;
    323 }
    324 
    325 
    326 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
    327       unsigned NewSR, MachineRegisterInfo &MRI) {
    328   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
    329       !TargetRegisterInfo::isVirtualRegister(NewR))
    330     return false;
    331   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
    332   decltype(End) NextI;
    333   for (auto I = Begin; I != End; I = NextI) {
    334     NextI = std::next(I);
    335     I->setReg(NewR);
    336     I->setSubReg(NewSR);
    337   }
    338   return Begin != End;
    339 }
    340 
    341 
    342 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
    343       unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
    344   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
    345       !TargetRegisterInfo::isVirtualRegister(NewR))
    346     return false;
    347   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
    348   decltype(End) NextI;
    349   for (auto I = Begin; I != End; I = NextI) {
    350     NextI = std::next(I);
    351     if (I->getSubReg() != OldSR)
    352       continue;
    353     I->setReg(NewR);
    354     I->setSubReg(NewSR);
    355   }
    356   return Begin != End;
    357 }
    358 
    359 
    360 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
    361 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
    362 // if this succeeded, otherwise return false.
    363 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
    364       unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
    365   const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
    366   if (RC == &Hexagon::IntRegsRegClass) {
    367     assert(RR.Sub == 0);
    368     Begin = 0;
    369     Width = 32;
    370     return true;
    371   }
    372   if (RC == &Hexagon::DoubleRegsRegClass) {
    373     if (RR.Sub == 0) {
    374       Begin = 0;
    375       Width = 64;
    376       return true;
    377     }
    378     assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
    379     Width = 32;
    380     Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
    381     return true;
    382   }
    383   return false;
    384 }
    385 
    386 
    387 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
    388 // subregister.
    389 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
    390       BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
    391   assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
    392   unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
    393   assert(Sub1 != Sub2);
    394   if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
    395     SL = I.getOperand(1);
    396     SH = I.getOperand(3);
    397     return true;
    398   }
    399   if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
    400     SH = I.getOperand(1);
    401     SL = I.getOperand(3);
    402     return true;
    403   }
    404   return false;
    405 }
    406 
    407 
    408 // All stores (except 64-bit stores) take a 32-bit register as the source
    409 // of the value to be stored. If the instruction stores into a location
    410 // that is shorter than 32 bits, some bits of the source register are not
    411 // used. For each store instruction, calculate the set of used bits in
    412 // the source register, and set appropriate bits in Bits. Return true if
    413 // the bits are calculated, false otherwise.
    414 bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
    415       uint16_t Begin) {
    416   using namespace Hexagon;
    417 
    418   switch (Opc) {
    419     // Store byte
    420     case S2_storerb_io:           // memb(Rs32+#s11:0)=Rt32
    421     case S2_storerbnew_io:        // memb(Rs32+#s11:0)=Nt8.new
    422     case S2_pstorerbt_io:         // if (Pv4) memb(Rs32+#u6:0)=Rt32
    423     case S2_pstorerbf_io:         // if (!Pv4) memb(Rs32+#u6:0)=Rt32
    424     case S4_pstorerbtnew_io:      // if (Pv4.new) memb(Rs32+#u6:0)=Rt32
    425     case S4_pstorerbfnew_io:      // if (!Pv4.new) memb(Rs32+#u6:0)=Rt32
    426     case S2_pstorerbnewt_io:      // if (Pv4) memb(Rs32+#u6:0)=Nt8.new
    427     case S2_pstorerbnewf_io:      // if (!Pv4) memb(Rs32+#u6:0)=Nt8.new
    428     case S4_pstorerbnewtnew_io:   // if (Pv4.new) memb(Rs32+#u6:0)=Nt8.new
    429     case S4_pstorerbnewfnew_io:   // if (!Pv4.new) memb(Rs32+#u6:0)=Nt8.new
    430     case S2_storerb_pi:           // memb(Rx32++#s4:0)=Rt32
    431     case S2_storerbnew_pi:        // memb(Rx32++#s4:0)=Nt8.new
    432     case S2_pstorerbt_pi:         // if (Pv4) memb(Rx32++#s4:0)=Rt32
    433     case S2_pstorerbf_pi:         // if (!Pv4) memb(Rx32++#s4:0)=Rt32
    434     case S2_pstorerbtnew_pi:      // if (Pv4.new) memb(Rx32++#s4:0)=Rt32
    435     case S2_pstorerbfnew_pi:      // if (!Pv4.new) memb(Rx32++#s4:0)=Rt32
    436     case S2_pstorerbnewt_pi:      // if (Pv4) memb(Rx32++#s4:0)=Nt8.new
    437     case S2_pstorerbnewf_pi:      // if (!Pv4) memb(Rx32++#s4:0)=Nt8.new
    438     case S2_pstorerbnewtnew_pi:   // if (Pv4.new) memb(Rx32++#s4:0)=Nt8.new
    439     case S2_pstorerbnewfnew_pi:   // if (!Pv4.new) memb(Rx32++#s4:0)=Nt8.new
    440     case S4_storerb_ap:           // memb(Re32=#U6)=Rt32
    441     case S4_storerbnew_ap:        // memb(Re32=#U6)=Nt8.new
    442     case S2_storerb_pr:           // memb(Rx32++Mu2)=Rt32
    443     case S2_storerbnew_pr:        // memb(Rx32++Mu2)=Nt8.new
    444     case S4_storerb_ur:           // memb(Ru32<<#u2+#U6)=Rt32
    445     case S4_storerbnew_ur:        // memb(Ru32<<#u2+#U6)=Nt8.new
    446     case S2_storerb_pbr:          // memb(Rx32++Mu2:brev)=Rt32
    447     case S2_storerbnew_pbr:       // memb(Rx32++Mu2:brev)=Nt8.new
    448     case S2_storerb_pci:          // memb(Rx32++#s4:0:circ(Mu2))=Rt32
    449     case S2_storerbnew_pci:       // memb(Rx32++#s4:0:circ(Mu2))=Nt8.new
    450     case S2_storerb_pcr:          // memb(Rx32++I:circ(Mu2))=Rt32
    451     case S2_storerbnew_pcr:       // memb(Rx32++I:circ(Mu2))=Nt8.new
    452     case S4_storerb_rr:           // memb(Rs32+Ru32<<#u2)=Rt32
    453     case S4_storerbnew_rr:        // memb(Rs32+Ru32<<#u2)=Nt8.new
    454     case S4_pstorerbt_rr:         // if (Pv4) memb(Rs32+Ru32<<#u2)=Rt32
    455     case S4_pstorerbf_rr:         // if (!Pv4) memb(Rs32+Ru32<<#u2)=Rt32
    456     case S4_pstorerbtnew_rr:      // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
    457     case S4_pstorerbfnew_rr:      // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Rt32
    458     case S4_pstorerbnewt_rr:      // if (Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
    459     case S4_pstorerbnewf_rr:      // if (!Pv4) memb(Rs32+Ru32<<#u2)=Nt8.new
    460     case S4_pstorerbnewtnew_rr:   // if (Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
    461     case S4_pstorerbnewfnew_rr:   // if (!Pv4.new) memb(Rs32+Ru32<<#u2)=Nt8.new
    462     case S2_storerbgp:            // memb(gp+#u16:0)=Rt32
    463     case S2_storerbnewgp:         // memb(gp+#u16:0)=Nt8.new
    464     case S4_pstorerbt_abs:        // if (Pv4) memb(#u6)=Rt32
    465     case S4_pstorerbf_abs:        // if (!Pv4) memb(#u6)=Rt32
    466     case S4_pstorerbtnew_abs:     // if (Pv4.new) memb(#u6)=Rt32
    467     case S4_pstorerbfnew_abs:     // if (!Pv4.new) memb(#u6)=Rt32
    468     case S4_pstorerbnewt_abs:     // if (Pv4) memb(#u6)=Nt8.new
    469     case S4_pstorerbnewf_abs:     // if (!Pv4) memb(#u6)=Nt8.new
    470     case S4_pstorerbnewtnew_abs:  // if (Pv4.new) memb(#u6)=Nt8.new
    471     case S4_pstorerbnewfnew_abs:  // if (!Pv4.new) memb(#u6)=Nt8.new
    472       Bits.set(Begin, Begin+8);
    473       return true;
    474 
    475     // Store low half
    476     case S2_storerh_io:           // memh(Rs32+#s11:1)=Rt32
    477     case S2_storerhnew_io:        // memh(Rs32+#s11:1)=Nt8.new
    478     case S2_pstorerht_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt32
    479     case S2_pstorerhf_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt32
    480     case S4_pstorerhtnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt32
    481     case S4_pstorerhfnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt32
    482     case S2_pstorerhnewt_io:      // if (Pv4) memh(Rs32+#u6:1)=Nt8.new
    483     case S2_pstorerhnewf_io:      // if (!Pv4) memh(Rs32+#u6:1)=Nt8.new
    484     case S4_pstorerhnewtnew_io:   // if (Pv4.new) memh(Rs32+#u6:1)=Nt8.new
    485     case S4_pstorerhnewfnew_io:   // if (!Pv4.new) memh(Rs32+#u6:1)=Nt8.new
    486     case S2_storerh_pi:           // memh(Rx32++#s4:1)=Rt32
    487     case S2_storerhnew_pi:        // memh(Rx32++#s4:1)=Nt8.new
    488     case S2_pstorerht_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt32
    489     case S2_pstorerhf_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt32
    490     case S2_pstorerhtnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt32
    491     case S2_pstorerhfnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt32
    492     case S2_pstorerhnewt_pi:      // if (Pv4) memh(Rx32++#s4:1)=Nt8.new
    493     case S2_pstorerhnewf_pi:      // if (!Pv4) memh(Rx32++#s4:1)=Nt8.new
    494     case S2_pstorerhnewtnew_pi:   // if (Pv4.new) memh(Rx32++#s4:1)=Nt8.new
    495     case S2_pstorerhnewfnew_pi:   // if (!Pv4.new) memh(Rx32++#s4:1)=Nt8.new
    496     case S4_storerh_ap:           // memh(Re32=#U6)=Rt32
    497     case S4_storerhnew_ap:        // memh(Re32=#U6)=Nt8.new
    498     case S2_storerh_pr:           // memh(Rx32++Mu2)=Rt32
    499     case S2_storerhnew_pr:        // memh(Rx32++Mu2)=Nt8.new
    500     case S4_storerh_ur:           // memh(Ru32<<#u2+#U6)=Rt32
    501     case S4_storerhnew_ur:        // memh(Ru32<<#u2+#U6)=Nt8.new
    502     case S2_storerh_pbr:          // memh(Rx32++Mu2:brev)=Rt32
    503     case S2_storerhnew_pbr:       // memh(Rx32++Mu2:brev)=Nt8.new
    504     case S2_storerh_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt32
    505     case S2_storerhnew_pci:       // memh(Rx32++#s4:1:circ(Mu2))=Nt8.new
    506     case S2_storerh_pcr:          // memh(Rx32++I:circ(Mu2))=Rt32
    507     case S2_storerhnew_pcr:       // memh(Rx32++I:circ(Mu2))=Nt8.new
    508     case S4_storerh_rr:           // memh(Rs32+Ru32<<#u2)=Rt32
    509     case S4_pstorerht_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt32
    510     case S4_pstorerhf_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt32
    511     case S4_pstorerhtnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
    512     case S4_pstorerhfnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt32
    513     case S4_storerhnew_rr:        // memh(Rs32+Ru32<<#u2)=Nt8.new
    514     case S4_pstorerhnewt_rr:      // if (Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
    515     case S4_pstorerhnewf_rr:      // if (!Pv4) memh(Rs32+Ru32<<#u2)=Nt8.new
    516     case S4_pstorerhnewtnew_rr:   // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
    517     case S4_pstorerhnewfnew_rr:   // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Nt8.new
    518     case S2_storerhgp:            // memh(gp+#u16:1)=Rt32
    519     case S2_storerhnewgp:         // memh(gp+#u16:1)=Nt8.new
    520     case S4_pstorerht_abs:        // if (Pv4) memh(#u6)=Rt32
    521     case S4_pstorerhf_abs:        // if (!Pv4) memh(#u6)=Rt32
    522     case S4_pstorerhtnew_abs:     // if (Pv4.new) memh(#u6)=Rt32
    523     case S4_pstorerhfnew_abs:     // if (!Pv4.new) memh(#u6)=Rt32
    524     case S4_pstorerhnewt_abs:     // if (Pv4) memh(#u6)=Nt8.new
    525     case S4_pstorerhnewf_abs:     // if (!Pv4) memh(#u6)=Nt8.new
    526     case S4_pstorerhnewtnew_abs:  // if (Pv4.new) memh(#u6)=Nt8.new
    527     case S4_pstorerhnewfnew_abs:  // if (!Pv4.new) memh(#u6)=Nt8.new
    528       Bits.set(Begin, Begin+16);
    529       return true;
    530 
    531     // Store high half
    532     case S2_storerf_io:           // memh(Rs32+#s11:1)=Rt.H32
    533     case S2_pstorerft_io:         // if (Pv4) memh(Rs32+#u6:1)=Rt.H32
    534     case S2_pstorerff_io:         // if (!Pv4) memh(Rs32+#u6:1)=Rt.H32
    535     case S4_pstorerftnew_io:      // if (Pv4.new) memh(Rs32+#u6:1)=Rt.H32
    536     case S4_pstorerffnew_io:      // if (!Pv4.new) memh(Rs32+#u6:1)=Rt.H32
    537     case S2_storerf_pi:           // memh(Rx32++#s4:1)=Rt.H32
    538     case S2_pstorerft_pi:         // if (Pv4) memh(Rx32++#s4:1)=Rt.H32
    539     case S2_pstorerff_pi:         // if (!Pv4) memh(Rx32++#s4:1)=Rt.H32
    540     case S2_pstorerftnew_pi:      // if (Pv4.new) memh(Rx32++#s4:1)=Rt.H32
    541     case S2_pstorerffnew_pi:      // if (!Pv4.new) memh(Rx32++#s4:1)=Rt.H32
    542     case S4_storerf_ap:           // memh(Re32=#U6)=Rt.H32
    543     case S2_storerf_pr:           // memh(Rx32++Mu2)=Rt.H32
    544     case S4_storerf_ur:           // memh(Ru32<<#u2+#U6)=Rt.H32
    545     case S2_storerf_pbr:          // memh(Rx32++Mu2:brev)=Rt.H32
    546     case S2_storerf_pci:          // memh(Rx32++#s4:1:circ(Mu2))=Rt.H32
    547     case S2_storerf_pcr:          // memh(Rx32++I:circ(Mu2))=Rt.H32
    548     case S4_storerf_rr:           // memh(Rs32+Ru32<<#u2)=Rt.H32
    549     case S4_pstorerft_rr:         // if (Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
    550     case S4_pstorerff_rr:         // if (!Pv4) memh(Rs32+Ru32<<#u2)=Rt.H32
    551     case S4_pstorerftnew_rr:      // if (Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
    552     case S4_pstorerffnew_rr:      // if (!Pv4.new) memh(Rs32+Ru32<<#u2)=Rt.H32
    553     case S2_storerfgp:            // memh(gp+#u16:1)=Rt.H32
    554     case S4_pstorerft_abs:        // if (Pv4) memh(#u6)=Rt.H32
    555     case S4_pstorerff_abs:        // if (!Pv4) memh(#u6)=Rt.H32
    556     case S4_pstorerftnew_abs:     // if (Pv4.new) memh(#u6)=Rt.H32
    557     case S4_pstorerffnew_abs:     // if (!Pv4.new) memh(#u6)=Rt.H32
    558       Bits.set(Begin+16, Begin+32);
    559       return true;
    560   }
    561 
    562   return false;
    563 }
    564 
    565 
    566 // For an instruction with opcode Opc, calculate the set of bits that it
    567 // uses in a register in operand OpN. This only calculates the set of used
    568 // bits for cases where it does not depend on any operands (as is the case
    569 // in shifts, for example). For concrete instructions from a program, the
    570 // operand may be a subregister of a larger register, while Bits would
    571 // correspond to the larger register in its entirety. Because of that,
    572 // the parameter Begin can be used to indicate which bit of Bits should be
    573 // considered the LSB of of the operand.
    574 bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
    575       BitVector &Bits, uint16_t Begin, const HexagonInstrInfo &HII) {
    576   using namespace Hexagon;
    577 
    578   const MCInstrDesc &D = HII.get(Opc);
    579   if (D.mayStore()) {
    580     if (OpN == D.getNumOperands()-1)
    581       return getUsedBitsInStore(Opc, Bits, Begin);
    582     return false;
    583   }
    584 
    585   switch (Opc) {
    586     // One register source. Used bits: R1[0-7].
    587     case A2_sxtb:
    588     case A2_zxtb:
    589     case A4_cmpbeqi:
    590     case A4_cmpbgti:
    591     case A4_cmpbgtui:
    592       if (OpN == 1) {
    593         Bits.set(Begin, Begin+8);
    594         return true;
    595       }
    596       break;
    597 
    598     // One register source. Used bits: R1[0-15].
    599     case A2_aslh:
    600     case A2_sxth:
    601     case A2_zxth:
    602     case A4_cmpheqi:
    603     case A4_cmphgti:
    604     case A4_cmphgtui:
    605       if (OpN == 1) {
    606         Bits.set(Begin, Begin+16);
    607         return true;
    608       }
    609       break;
    610 
    611     // One register source. Used bits: R1[16-31].
    612     case A2_asrh:
    613       if (OpN == 1) {
    614         Bits.set(Begin+16, Begin+32);
    615         return true;
    616       }
    617       break;
    618 
    619     // Two register sources. Used bits: R1[0-7], R2[0-7].
    620     case A4_cmpbeq:
    621     case A4_cmpbgt:
    622     case A4_cmpbgtu:
    623       if (OpN == 1) {
    624         Bits.set(Begin, Begin+8);
    625         return true;
    626       }
    627       break;
    628 
    629     // Two register sources. Used bits: R1[0-15], R2[0-15].
    630     case A4_cmpheq:
    631     case A4_cmphgt:
    632     case A4_cmphgtu:
    633     case A2_addh_h16_ll:
    634     case A2_addh_h16_sat_ll:
    635     case A2_addh_l16_ll:
    636     case A2_addh_l16_sat_ll:
    637     case A2_combine_ll:
    638     case A2_subh_h16_ll:
    639     case A2_subh_h16_sat_ll:
    640     case A2_subh_l16_ll:
    641     case A2_subh_l16_sat_ll:
    642     case M2_mpy_acc_ll_s0:
    643     case M2_mpy_acc_ll_s1:
    644     case M2_mpy_acc_sat_ll_s0:
    645     case M2_mpy_acc_sat_ll_s1:
    646     case M2_mpy_ll_s0:
    647     case M2_mpy_ll_s1:
    648     case M2_mpy_nac_ll_s0:
    649     case M2_mpy_nac_ll_s1:
    650     case M2_mpy_nac_sat_ll_s0:
    651     case M2_mpy_nac_sat_ll_s1:
    652     case M2_mpy_rnd_ll_s0:
    653     case M2_mpy_rnd_ll_s1:
    654     case M2_mpy_sat_ll_s0:
    655     case M2_mpy_sat_ll_s1:
    656     case M2_mpy_sat_rnd_ll_s0:
    657     case M2_mpy_sat_rnd_ll_s1:
    658     case M2_mpyd_acc_ll_s0:
    659     case M2_mpyd_acc_ll_s1:
    660     case M2_mpyd_ll_s0:
    661     case M2_mpyd_ll_s1:
    662     case M2_mpyd_nac_ll_s0:
    663     case M2_mpyd_nac_ll_s1:
    664     case M2_mpyd_rnd_ll_s0:
    665     case M2_mpyd_rnd_ll_s1:
    666     case M2_mpyu_acc_ll_s0:
    667     case M2_mpyu_acc_ll_s1:
    668     case M2_mpyu_ll_s0:
    669     case M2_mpyu_ll_s1:
    670     case M2_mpyu_nac_ll_s0:
    671     case M2_mpyu_nac_ll_s1:
    672     case M2_mpyud_acc_ll_s0:
    673     case M2_mpyud_acc_ll_s1:
    674     case M2_mpyud_ll_s0:
    675     case M2_mpyud_ll_s1:
    676     case M2_mpyud_nac_ll_s0:
    677     case M2_mpyud_nac_ll_s1:
    678       if (OpN == 1 || OpN == 2) {
    679         Bits.set(Begin, Begin+16);
    680         return true;
    681       }
    682       break;
    683 
    684     // Two register sources. Used bits: R1[0-15], R2[16-31].
    685     case A2_addh_h16_lh:
    686     case A2_addh_h16_sat_lh:
    687     case A2_combine_lh:
    688     case A2_subh_h16_lh:
    689     case A2_subh_h16_sat_lh:
    690     case M2_mpy_acc_lh_s0:
    691     case M2_mpy_acc_lh_s1:
    692     case M2_mpy_acc_sat_lh_s0:
    693     case M2_mpy_acc_sat_lh_s1:
    694     case M2_mpy_lh_s0:
    695     case M2_mpy_lh_s1:
    696     case M2_mpy_nac_lh_s0:
    697     case M2_mpy_nac_lh_s1:
    698     case M2_mpy_nac_sat_lh_s0:
    699     case M2_mpy_nac_sat_lh_s1:
    700     case M2_mpy_rnd_lh_s0:
    701     case M2_mpy_rnd_lh_s1:
    702     case M2_mpy_sat_lh_s0:
    703     case M2_mpy_sat_lh_s1:
    704     case M2_mpy_sat_rnd_lh_s0:
    705     case M2_mpy_sat_rnd_lh_s1:
    706     case M2_mpyd_acc_lh_s0:
    707     case M2_mpyd_acc_lh_s1:
    708     case M2_mpyd_lh_s0:
    709     case M2_mpyd_lh_s1:
    710     case M2_mpyd_nac_lh_s0:
    711     case M2_mpyd_nac_lh_s1:
    712     case M2_mpyd_rnd_lh_s0:
    713     case M2_mpyd_rnd_lh_s1:
    714     case M2_mpyu_acc_lh_s0:
    715     case M2_mpyu_acc_lh_s1:
    716     case M2_mpyu_lh_s0:
    717     case M2_mpyu_lh_s1:
    718     case M2_mpyu_nac_lh_s0:
    719     case M2_mpyu_nac_lh_s1:
    720     case M2_mpyud_acc_lh_s0:
    721     case M2_mpyud_acc_lh_s1:
    722     case M2_mpyud_lh_s0:
    723     case M2_mpyud_lh_s1:
    724     case M2_mpyud_nac_lh_s0:
    725     case M2_mpyud_nac_lh_s1:
    726     // These four are actually LH.
    727     case A2_addh_l16_hl:
    728     case A2_addh_l16_sat_hl:
    729     case A2_subh_l16_hl:
    730     case A2_subh_l16_sat_hl:
    731       if (OpN == 1) {
    732         Bits.set(Begin, Begin+16);
    733         return true;
    734       }
    735       if (OpN == 2) {
    736         Bits.set(Begin+16, Begin+32);
    737         return true;
    738       }
    739       break;
    740 
    741     // Two register sources, used bits: R1[16-31], R2[0-15].
    742     case A2_addh_h16_hl:
    743     case A2_addh_h16_sat_hl:
    744     case A2_combine_hl:
    745     case A2_subh_h16_hl:
    746     case A2_subh_h16_sat_hl:
    747     case M2_mpy_acc_hl_s0:
    748     case M2_mpy_acc_hl_s1:
    749     case M2_mpy_acc_sat_hl_s0:
    750     case M2_mpy_acc_sat_hl_s1:
    751     case M2_mpy_hl_s0:
    752     case M2_mpy_hl_s1:
    753     case M2_mpy_nac_hl_s0:
    754     case M2_mpy_nac_hl_s1:
    755     case M2_mpy_nac_sat_hl_s0:
    756     case M2_mpy_nac_sat_hl_s1:
    757     case M2_mpy_rnd_hl_s0:
    758     case M2_mpy_rnd_hl_s1:
    759     case M2_mpy_sat_hl_s0:
    760     case M2_mpy_sat_hl_s1:
    761     case M2_mpy_sat_rnd_hl_s0:
    762     case M2_mpy_sat_rnd_hl_s1:
    763     case M2_mpyd_acc_hl_s0:
    764     case M2_mpyd_acc_hl_s1:
    765     case M2_mpyd_hl_s0:
    766     case M2_mpyd_hl_s1:
    767     case M2_mpyd_nac_hl_s0:
    768     case M2_mpyd_nac_hl_s1:
    769     case M2_mpyd_rnd_hl_s0:
    770     case M2_mpyd_rnd_hl_s1:
    771     case M2_mpyu_acc_hl_s0:
    772     case M2_mpyu_acc_hl_s1:
    773     case M2_mpyu_hl_s0:
    774     case M2_mpyu_hl_s1:
    775     case M2_mpyu_nac_hl_s0:
    776     case M2_mpyu_nac_hl_s1:
    777     case M2_mpyud_acc_hl_s0:
    778     case M2_mpyud_acc_hl_s1:
    779     case M2_mpyud_hl_s0:
    780     case M2_mpyud_hl_s1:
    781     case M2_mpyud_nac_hl_s0:
    782     case M2_mpyud_nac_hl_s1:
    783       if (OpN == 1) {
    784         Bits.set(Begin+16, Begin+32);
    785         return true;
    786       }
    787       if (OpN == 2) {
    788         Bits.set(Begin, Begin+16);
    789         return true;
    790       }
    791       break;
    792 
    793     // Two register sources, used bits: R1[16-31], R2[16-31].
    794     case A2_addh_h16_hh:
    795     case A2_addh_h16_sat_hh:
    796     case A2_combine_hh:
    797     case A2_subh_h16_hh:
    798     case A2_subh_h16_sat_hh:
    799     case M2_mpy_acc_hh_s0:
    800     case M2_mpy_acc_hh_s1:
    801     case M2_mpy_acc_sat_hh_s0:
    802     case M2_mpy_acc_sat_hh_s1:
    803     case M2_mpy_hh_s0:
    804     case M2_mpy_hh_s1:
    805     case M2_mpy_nac_hh_s0:
    806     case M2_mpy_nac_hh_s1:
    807     case M2_mpy_nac_sat_hh_s0:
    808     case M2_mpy_nac_sat_hh_s1:
    809     case M2_mpy_rnd_hh_s0:
    810     case M2_mpy_rnd_hh_s1:
    811     case M2_mpy_sat_hh_s0:
    812     case M2_mpy_sat_hh_s1:
    813     case M2_mpy_sat_rnd_hh_s0:
    814     case M2_mpy_sat_rnd_hh_s1:
    815     case M2_mpyd_acc_hh_s0:
    816     case M2_mpyd_acc_hh_s1:
    817     case M2_mpyd_hh_s0:
    818     case M2_mpyd_hh_s1:
    819     case M2_mpyd_nac_hh_s0:
    820     case M2_mpyd_nac_hh_s1:
    821     case M2_mpyd_rnd_hh_s0:
    822     case M2_mpyd_rnd_hh_s1:
    823     case M2_mpyu_acc_hh_s0:
    824     case M2_mpyu_acc_hh_s1:
    825     case M2_mpyu_hh_s0:
    826     case M2_mpyu_hh_s1:
    827     case M2_mpyu_nac_hh_s0:
    828     case M2_mpyu_nac_hh_s1:
    829     case M2_mpyud_acc_hh_s0:
    830     case M2_mpyud_acc_hh_s1:
    831     case M2_mpyud_hh_s0:
    832     case M2_mpyud_hh_s1:
    833     case M2_mpyud_nac_hh_s0:
    834     case M2_mpyud_nac_hh_s1:
    835       if (OpN == 1 || OpN == 2) {
    836         Bits.set(Begin+16, Begin+32);
    837         return true;
    838       }
    839       break;
    840   }
    841 
    842   return false;
    843 }
    844 
    845 
    846 // Calculate the register class that matches Reg:Sub. For example, if
    847 // vreg1 is a double register, then vreg1:subreg_hireg would match "int"
    848 // register class.
    849 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
    850       const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
    851   if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
    852     return nullptr;
    853   auto *RC = MRI.getRegClass(RR.Reg);
    854   if (RR.Sub == 0)
    855     return RC;
    856 
    857   auto VerifySR = [] (unsigned Sub) -> void {
    858     assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
    859   };
    860 
    861   switch (RC->getID()) {
    862     case Hexagon::DoubleRegsRegClassID:
    863       VerifySR(RR.Sub);
    864       return &Hexagon::IntRegsRegClass;
    865     case Hexagon::VecDblRegsRegClassID:
    866       VerifySR(RR.Sub);
    867       return &Hexagon::VectorRegsRegClass;
    868     case Hexagon::VecDblRegs128BRegClassID:
    869       VerifySR(RR.Sub);
    870       return &Hexagon::VectorRegs128BRegClass;
    871   }
    872   return nullptr;
    873 }
    874 
    875 
    876 // Check if RD could be replaced with RS at any possible use of RD.
    877 // For example a predicate register cannot be replaced with a integer
    878 // register, but a 64-bit register with a subregister can be replaced
    879 // with a 32-bit register.
    880 bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
    881       const BitTracker::RegisterRef &RS, MachineRegisterInfo &MRI) {
    882   if (!TargetRegisterInfo::isVirtualRegister(RD.Reg) ||
    883       !TargetRegisterInfo::isVirtualRegister(RS.Reg))
    884     return false;
    885   // Return false if one (or both) classes are nullptr.
    886   auto *DRC = getFinalVRegClass(RD, MRI);
    887   if (!DRC)
    888     return false;
    889 
    890   return DRC == getFinalVRegClass(RS, MRI);
    891 }
    892 
    893 
    894 //
    895 // Dead code elimination
    896 //
    897 namespace {
    898   class DeadCodeElimination {
    899   public:
    900     DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
    901       : MF(mf), HII(*MF.getSubtarget<HexagonSubtarget>().getInstrInfo()),
    902         MDT(mdt), MRI(mf.getRegInfo()) {}
    903 
    904     bool run() {
    905       return runOnNode(MDT.getRootNode());
    906     }
    907 
    908   private:
    909     bool isDead(unsigned R) const;
    910     bool runOnNode(MachineDomTreeNode *N);
    911 
    912     MachineFunction &MF;
    913     const HexagonInstrInfo &HII;
    914     MachineDominatorTree &MDT;
    915     MachineRegisterInfo &MRI;
    916   };
    917 }
    918 
    919 
    920 bool DeadCodeElimination::isDead(unsigned R) const {
    921   for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
    922     MachineInstr *UseI = I->getParent();
    923     if (UseI->isDebugValue())
    924       continue;
    925     if (UseI->isPHI()) {
    926       assert(!UseI->getOperand(0).getSubReg());
    927       unsigned DR = UseI->getOperand(0).getReg();
    928       if (DR == R)
    929         continue;
    930     }
    931     return false;
    932   }
    933   return true;
    934 }
    935 
    936 
    937 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
    938   bool Changed = false;
    939   typedef GraphTraits<MachineDomTreeNode*> GTN;
    940   for (auto I = GTN::child_begin(N), E = GTN::child_end(N); I != E; ++I)
    941     Changed |= runOnNode(*I);
    942 
    943   MachineBasicBlock *B = N->getBlock();
    944   std::vector<MachineInstr*> Instrs;
    945   for (auto I = B->rbegin(), E = B->rend(); I != E; ++I)
    946     Instrs.push_back(&*I);
    947 
    948   for (auto MI : Instrs) {
    949     unsigned Opc = MI->getOpcode();
    950     // Do not touch lifetime markers. This is why the target-independent DCE
    951     // cannot be used.
    952     if (Opc == TargetOpcode::LIFETIME_START ||
    953         Opc == TargetOpcode::LIFETIME_END)
    954       continue;
    955     bool Store = false;
    956     if (MI->isInlineAsm())
    957       continue;
    958     // Delete PHIs if possible.
    959     if (!MI->isPHI() && !MI->isSafeToMove(nullptr, Store))
    960       continue;
    961 
    962     bool AllDead = true;
    963     SmallVector<unsigned,2> Regs;
    964     for (auto &Op : MI->operands()) {
    965       if (!Op.isReg() || !Op.isDef())
    966         continue;
    967       unsigned R = Op.getReg();
    968       if (!TargetRegisterInfo::isVirtualRegister(R) || !isDead(R)) {
    969         AllDead = false;
    970         break;
    971       }
    972       Regs.push_back(R);
    973     }
    974     if (!AllDead)
    975       continue;
    976 
    977     B->erase(MI);
    978     for (unsigned i = 0, n = Regs.size(); i != n; ++i)
    979       MRI.markUsesInDebugValueAsUndef(Regs[i]);
    980     Changed = true;
    981   }
    982 
    983   return Changed;
    984 }
    985 
    986 
    987 //
    988 // Eliminate redundant instructions
    989 //
    990 // This transformation will identify instructions where the output register
    991 // is the same as one of its input registers. This only works on instructions
    992 // that define a single register (unlike post-increment loads, for example).
    993 // The equality check is actually more detailed: the code calculates which
    994 // bits of the output are used, and only compares these bits with the input
    995 // registers.
    996 // If the output matches an input, the instruction is replaced with COPY.
    997 // The copies will be removed by another transformation.
    998 namespace {
    999   class RedundantInstrElimination : public Transformation {
   1000   public:
   1001     RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
   1002           MachineRegisterInfo &mri)
   1003         : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
   1004     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
   1005   private:
   1006     bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
   1007           unsigned &LostB, unsigned &LostE);
   1008     bool isLossyShiftRight(const MachineInstr &MI, unsigned OpN,
   1009           unsigned &LostB, unsigned &LostE);
   1010     bool computeUsedBits(unsigned Reg, BitVector &Bits);
   1011     bool computeUsedBits(const MachineInstr &MI, unsigned OpN, BitVector &Bits,
   1012           uint16_t Begin);
   1013     bool usedBitsEqual(BitTracker::RegisterRef RD, BitTracker::RegisterRef RS);
   1014 
   1015     const HexagonInstrInfo &HII;
   1016     MachineRegisterInfo &MRI;
   1017     BitTracker &BT;
   1018   };
   1019 }
   1020 
   1021 
   1022 // Check if the instruction is a lossy shift left, where the input being
   1023 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
   1024 // of bit indices that are lost.
   1025 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
   1026       unsigned OpN, unsigned &LostB, unsigned &LostE) {
   1027   using namespace Hexagon;
   1028   unsigned Opc = MI.getOpcode();
   1029   unsigned ImN, RegN, Width;
   1030   switch (Opc) {
   1031     case S2_asl_i_p:
   1032       ImN = 2;
   1033       RegN = 1;
   1034       Width = 64;
   1035       break;
   1036     case S2_asl_i_p_acc:
   1037     case S2_asl_i_p_and:
   1038     case S2_asl_i_p_nac:
   1039     case S2_asl_i_p_or:
   1040     case S2_asl_i_p_xacc:
   1041       ImN = 3;
   1042       RegN = 2;
   1043       Width = 64;
   1044       break;
   1045     case S2_asl_i_r:
   1046       ImN = 2;
   1047       RegN = 1;
   1048       Width = 32;
   1049       break;
   1050     case S2_addasl_rrri:
   1051     case S4_andi_asl_ri:
   1052     case S4_ori_asl_ri:
   1053     case S4_addi_asl_ri:
   1054     case S4_subi_asl_ri:
   1055     case S2_asl_i_r_acc:
   1056     case S2_asl_i_r_and:
   1057     case S2_asl_i_r_nac:
   1058     case S2_asl_i_r_or:
   1059     case S2_asl_i_r_sat:
   1060     case S2_asl_i_r_xacc:
   1061       ImN = 3;
   1062       RegN = 2;
   1063       Width = 32;
   1064       break;
   1065     default:
   1066       return false;
   1067   }
   1068 
   1069   if (RegN != OpN)
   1070     return false;
   1071 
   1072   assert(MI.getOperand(ImN).isImm());
   1073   unsigned S = MI.getOperand(ImN).getImm();
   1074   if (S == 0)
   1075     return false;
   1076   LostB = Width-S;
   1077   LostE = Width;
   1078   return true;
   1079 }
   1080 
   1081 
   1082 // Check if the instruction is a lossy shift right, where the input being
   1083 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
   1084 // of bit indices that are lost.
   1085 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
   1086       unsigned OpN, unsigned &LostB, unsigned &LostE) {
   1087   using namespace Hexagon;
   1088   unsigned Opc = MI.getOpcode();
   1089   unsigned ImN, RegN;
   1090   switch (Opc) {
   1091     case S2_asr_i_p:
   1092     case S2_lsr_i_p:
   1093       ImN = 2;
   1094       RegN = 1;
   1095       break;
   1096     case S2_asr_i_p_acc:
   1097     case S2_asr_i_p_and:
   1098     case S2_asr_i_p_nac:
   1099     case S2_asr_i_p_or:
   1100     case S2_lsr_i_p_acc:
   1101     case S2_lsr_i_p_and:
   1102     case S2_lsr_i_p_nac:
   1103     case S2_lsr_i_p_or:
   1104     case S2_lsr_i_p_xacc:
   1105       ImN = 3;
   1106       RegN = 2;
   1107       break;
   1108     case S2_asr_i_r:
   1109     case S2_lsr_i_r:
   1110       ImN = 2;
   1111       RegN = 1;
   1112       break;
   1113     case S4_andi_lsr_ri:
   1114     case S4_ori_lsr_ri:
   1115     case S4_addi_lsr_ri:
   1116     case S4_subi_lsr_ri:
   1117     case S2_asr_i_r_acc:
   1118     case S2_asr_i_r_and:
   1119     case S2_asr_i_r_nac:
   1120     case S2_asr_i_r_or:
   1121     case S2_lsr_i_r_acc:
   1122     case S2_lsr_i_r_and:
   1123     case S2_lsr_i_r_nac:
   1124     case S2_lsr_i_r_or:
   1125     case S2_lsr_i_r_xacc:
   1126       ImN = 3;
   1127       RegN = 2;
   1128       break;
   1129 
   1130     default:
   1131       return false;
   1132   }
   1133 
   1134   if (RegN != OpN)
   1135     return false;
   1136 
   1137   assert(MI.getOperand(ImN).isImm());
   1138   unsigned S = MI.getOperand(ImN).getImm();
   1139   LostB = 0;
   1140   LostE = S;
   1141   return true;
   1142 }
   1143 
   1144 
   1145 // Calculate the bit vector that corresponds to the used bits of register Reg.
   1146 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
   1147 // culation fails (i.e. the used bits are unknown), it returns false. Other-
   1148 // wise, it returns true and sets the corresponding bits in Bits.
   1149 bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
   1150   BitVector Used(Bits.size());
   1151   RegisterSet Visited;
   1152   std::vector<unsigned> Pending;
   1153   Pending.push_back(Reg);
   1154 
   1155   for (unsigned i = 0; i < Pending.size(); ++i) {
   1156     unsigned R = Pending[i];
   1157     if (Visited.has(R))
   1158       continue;
   1159     Visited.insert(R);
   1160     for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
   1161       BitTracker::RegisterRef UR = *I;
   1162       unsigned B, W;
   1163       if (!HBS::getSubregMask(UR, B, W, MRI))
   1164         return false;
   1165       MachineInstr &UseI = *I->getParent();
   1166       if (UseI.isPHI() || UseI.isCopy()) {
   1167         unsigned DefR = UseI.getOperand(0).getReg();
   1168         if (!TargetRegisterInfo::isVirtualRegister(DefR))
   1169           return false;
   1170         Pending.push_back(DefR);
   1171       } else {
   1172         if (!computeUsedBits(UseI, I.getOperandNo(), Used, B))
   1173           return false;
   1174       }
   1175     }
   1176   }
   1177   Bits |= Used;
   1178   return true;
   1179 }
   1180 
   1181 
   1182 // Calculate the bits used by instruction MI in a register in operand OpN.
   1183 // Return true/false if the calculation succeeds/fails. If is succeeds, set
   1184 // used bits in Bits. This function does not reset any bits in Bits, so
   1185 // subsequent calls over different instructions will result in the union
   1186 // of the used bits in all these instructions.
   1187 // The register in question may be used with a sub-register, whereas Bits
   1188 // holds the bits for the entire register. To keep track of that, the
   1189 // argument Begin indicates where in Bits is the lowest-significant bit
   1190 // of the register used in operand OpN. For example, in instruction:
   1191 //   vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
   1192 // the operand 1 is a 32-bit register, which happens to be a subregister
   1193 // of the 64-bit register vreg2, and that subregister starts at position 32.
   1194 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
   1195 // of vreg2:subreg_hireg.
   1196 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
   1197       unsigned OpN, BitVector &Bits, uint16_t Begin) {
   1198   unsigned Opc = MI.getOpcode();
   1199   BitVector T(Bits.size());
   1200   bool GotBits = HBS::getUsedBits(Opc, OpN, T, Begin, HII);
   1201   // Even if we don't have bits yet, we could still provide some information
   1202   // if the instruction is a lossy shift: the lost bits will be marked as
   1203   // not used.
   1204   unsigned LB, LE;
   1205   if (isLossyShiftLeft(MI, OpN, LB, LE) || isLossyShiftRight(MI, OpN, LB, LE)) {
   1206     assert(MI.getOperand(OpN).isReg());
   1207     BitTracker::RegisterRef RR = MI.getOperand(OpN);
   1208     const TargetRegisterClass *RC = HBS::getFinalVRegClass(RR, MRI);
   1209     uint16_t Width = RC->getSize()*8;
   1210 
   1211     if (!GotBits)
   1212       T.set(Begin, Begin+Width);
   1213     assert(LB <= LE && LB < Width && LE <= Width);
   1214     T.reset(Begin+LB, Begin+LE);
   1215     GotBits = true;
   1216   }
   1217   if (GotBits)
   1218     Bits |= T;
   1219   return GotBits;
   1220 }
   1221 
   1222 
   1223 // Calculates the used bits in RD ("defined register"), and checks if these
   1224 // bits in RS ("used register") and RD are identical.
   1225 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
   1226       BitTracker::RegisterRef RS) {
   1227   const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
   1228   const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
   1229 
   1230   unsigned DB, DW;
   1231   if (!HBS::getSubregMask(RD, DB, DW, MRI))
   1232     return false;
   1233   unsigned SB, SW;
   1234   if (!HBS::getSubregMask(RS, SB, SW, MRI))
   1235     return false;
   1236   if (SW != DW)
   1237     return false;
   1238 
   1239   BitVector Used(DC.width());
   1240   if (!computeUsedBits(RD.Reg, Used))
   1241     return false;
   1242 
   1243   for (unsigned i = 0; i != DW; ++i)
   1244     if (Used[i+DB] && DC[DB+i] != SC[SB+i])
   1245       return false;
   1246   return true;
   1247 }
   1248 
   1249 
   1250 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
   1251       const RegisterSet&) {
   1252   bool Changed = false;
   1253 
   1254   for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
   1255     NextI = std::next(I);
   1256     MachineInstr *MI = &*I;
   1257 
   1258     if (MI->getOpcode() == TargetOpcode::COPY)
   1259       continue;
   1260     if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
   1261       continue;
   1262     unsigned NumD = MI->getDesc().getNumDefs();
   1263     if (NumD != 1)
   1264       continue;
   1265 
   1266     BitTracker::RegisterRef RD = MI->getOperand(0);
   1267     if (!BT.has(RD.Reg))
   1268       continue;
   1269     const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
   1270     auto At = MI->isPHI() ? B.getFirstNonPHI()
   1271                           : MachineBasicBlock::iterator(MI);
   1272 
   1273     // Find a source operand that is equal to the result.
   1274     for (auto &Op : MI->uses()) {
   1275       if (!Op.isReg())
   1276         continue;
   1277       BitTracker::RegisterRef RS = Op;
   1278       if (!BT.has(RS.Reg))
   1279         continue;
   1280       if (!HBS::isTransparentCopy(RD, RS, MRI))
   1281         continue;
   1282 
   1283       unsigned BN, BW;
   1284       if (!HBS::getSubregMask(RS, BN, BW, MRI))
   1285         continue;
   1286 
   1287       const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
   1288       if (!usedBitsEqual(RD, RS) && !HBS::isEqual(DC, 0, SC, BN, BW))
   1289         continue;
   1290 
   1291       // If found, replace the instruction with a COPY.
   1292       const DebugLoc &DL = MI->getDebugLoc();
   1293       const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
   1294       unsigned NewR = MRI.createVirtualRegister(FRC);
   1295       BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
   1296           .addReg(RS.Reg, 0, RS.Sub);
   1297       HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
   1298       BT.put(BitTracker::RegisterRef(NewR), SC);
   1299       Changed = true;
   1300       break;
   1301     }
   1302   }
   1303 
   1304   return Changed;
   1305 }
   1306 
   1307 
   1308 //
   1309 // Const generation
   1310 //
   1311 // Recognize instructions that produce constant values known at compile-time.
   1312 // Replace them with register definitions that load these constants directly.
   1313 namespace {
   1314   class ConstGeneration : public Transformation {
   1315   public:
   1316     ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
   1317         MachineRegisterInfo &mri)
   1318       : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
   1319     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
   1320   private:
   1321     bool isTfrConst(const MachineInstr &MI) const;
   1322     bool isConst(unsigned R, int64_t &V) const;
   1323     unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
   1324         MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
   1325 
   1326     const HexagonInstrInfo &HII;
   1327     MachineRegisterInfo &MRI;
   1328     BitTracker &BT;
   1329   };
   1330 }
   1331 
   1332 bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
   1333   if (!BT.has(R))
   1334     return false;
   1335   const BitTracker::RegisterCell &RC = BT.lookup(R);
   1336   int64_t T = 0;
   1337   for (unsigned i = RC.width(); i > 0; --i) {
   1338     const BitTracker::BitValue &V = RC[i-1];
   1339     T <<= 1;
   1340     if (V.is(1))
   1341       T |= 1;
   1342     else if (!V.is(0))
   1343       return false;
   1344   }
   1345   C = T;
   1346   return true;
   1347 }
   1348 
   1349 bool ConstGeneration::isTfrConst(const MachineInstr &MI) const {
   1350   unsigned Opc = MI.getOpcode();
   1351   switch (Opc) {
   1352     case Hexagon::A2_combineii:
   1353     case Hexagon::A4_combineii:
   1354     case Hexagon::A2_tfrsi:
   1355     case Hexagon::A2_tfrpi:
   1356     case Hexagon::TFR_PdTrue:
   1357     case Hexagon::TFR_PdFalse:
   1358     case Hexagon::CONST32_Int_Real:
   1359     case Hexagon::CONST64_Int_Real:
   1360       return true;
   1361   }
   1362   return false;
   1363 }
   1364 
   1365 
   1366 // Generate a transfer-immediate instruction that is appropriate for the
   1367 // register class and the actual value being transferred.
   1368 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
   1369       MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL) {
   1370   unsigned Reg = MRI.createVirtualRegister(RC);
   1371   if (RC == &Hexagon::IntRegsRegClass) {
   1372     BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrsi), Reg)
   1373         .addImm(int32_t(C));
   1374     return Reg;
   1375   }
   1376 
   1377   if (RC == &Hexagon::DoubleRegsRegClass) {
   1378     if (isInt<8>(C)) {
   1379       BuildMI(B, At, DL, HII.get(Hexagon::A2_tfrpi), Reg)
   1380           .addImm(C);
   1381       return Reg;
   1382     }
   1383 
   1384     unsigned Lo = Lo_32(C), Hi = Hi_32(C);
   1385     if (isInt<8>(Lo) || isInt<8>(Hi)) {
   1386       unsigned Opc = isInt<8>(Lo) ? Hexagon::A2_combineii
   1387                                   : Hexagon::A4_combineii;
   1388       BuildMI(B, At, DL, HII.get(Opc), Reg)
   1389           .addImm(int32_t(Hi))
   1390           .addImm(int32_t(Lo));
   1391       return Reg;
   1392     }
   1393 
   1394     BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
   1395         .addImm(C);
   1396     return Reg;
   1397   }
   1398 
   1399   if (RC == &Hexagon::PredRegsRegClass) {
   1400     unsigned Opc;
   1401     if (C == 0)
   1402       Opc = Hexagon::TFR_PdFalse;
   1403     else if ((C & 0xFF) == 0xFF)
   1404       Opc = Hexagon::TFR_PdTrue;
   1405     else
   1406       return 0;
   1407     BuildMI(B, At, DL, HII.get(Opc), Reg);
   1408     return Reg;
   1409   }
   1410 
   1411   return 0;
   1412 }
   1413 
   1414 
   1415 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   1416   bool Changed = false;
   1417   RegisterSet Defs;
   1418 
   1419   for (auto I = B.begin(), E = B.end(); I != E; ++I) {
   1420     if (isTfrConst(*I))
   1421       continue;
   1422     Defs.clear();
   1423     HBS::getInstrDefs(*I, Defs);
   1424     if (Defs.count() != 1)
   1425       continue;
   1426     unsigned DR = Defs.find_first();
   1427     if (!TargetRegisterInfo::isVirtualRegister(DR))
   1428       continue;
   1429     int64_t C;
   1430     if (isConst(DR, C)) {
   1431       DebugLoc DL = I->getDebugLoc();
   1432       auto At = I->isPHI() ? B.getFirstNonPHI() : I;
   1433       unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
   1434       if (ImmReg) {
   1435         HBS::replaceReg(DR, ImmReg, MRI);
   1436         BT.put(ImmReg, BT.lookup(DR));
   1437         Changed = true;
   1438       }
   1439     }
   1440   }
   1441   return Changed;
   1442 }
   1443 
   1444 
   1445 //
   1446 // Copy generation
   1447 //
   1448 // Identify pairs of available registers which hold identical values.
   1449 // In such cases, only one of them needs to be calculated, the other one
   1450 // will be defined as a copy of the first.
   1451 //
   1452 // Copy propagation
   1453 //
   1454 // Eliminate register copies RD = RS, by replacing the uses of RD with
   1455 // with uses of RS.
   1456 namespace {
   1457   class CopyGeneration : public Transformation {
   1458   public:
   1459     CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
   1460         MachineRegisterInfo &mri)
   1461       : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
   1462     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
   1463   private:
   1464     bool findMatch(const BitTracker::RegisterRef &Inp,
   1465         BitTracker::RegisterRef &Out, const RegisterSet &AVs);
   1466 
   1467     const HexagonInstrInfo &HII;
   1468     MachineRegisterInfo &MRI;
   1469     BitTracker &BT;
   1470   };
   1471 
   1472   class CopyPropagation : public Transformation {
   1473   public:
   1474     CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
   1475         : Transformation(false), MRI(mri) {}
   1476     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
   1477     static bool isCopyReg(unsigned Opc);
   1478   private:
   1479     bool propagateRegCopy(MachineInstr &MI);
   1480 
   1481     MachineRegisterInfo &MRI;
   1482   };
   1483 
   1484 }
   1485 
   1486 
   1487 /// Check if there is a register in AVs that is identical to Inp. If so,
   1488 /// set Out to the found register. The output may be a pair Reg:Sub.
   1489 bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
   1490       BitTracker::RegisterRef &Out, const RegisterSet &AVs) {
   1491   if (!BT.has(Inp.Reg))
   1492     return false;
   1493   const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
   1494   unsigned B, W;
   1495   if (!HBS::getSubregMask(Inp, B, W, MRI))
   1496     return false;
   1497 
   1498   for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
   1499     if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
   1500       continue;
   1501     const BitTracker::RegisterCell &RC = BT.lookup(R);
   1502     unsigned RW = RC.width();
   1503     if (W == RW) {
   1504       if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
   1505         continue;
   1506       if (!HBS::isEqual(InpRC, B, RC, 0, W))
   1507         continue;
   1508       Out.Reg = R;
   1509       Out.Sub = 0;
   1510       return true;
   1511     }
   1512     // Check if there is a super-register, whose part (with a subregister)
   1513     // is equal to the input.
   1514     // Only do double registers for now.
   1515     if (W*2 != RW)
   1516       continue;
   1517     if (MRI.getRegClass(R) != &Hexagon::DoubleRegsRegClass)
   1518       continue;
   1519 
   1520     if (HBS::isEqual(InpRC, B, RC, 0, W))
   1521       Out.Sub = Hexagon::subreg_loreg;
   1522     else if (HBS::isEqual(InpRC, B, RC, W, W))
   1523       Out.Sub = Hexagon::subreg_hireg;
   1524     else
   1525       continue;
   1526     Out.Reg = R;
   1527     return true;
   1528   }
   1529   return false;
   1530 }
   1531 
   1532 
   1533 bool CopyGeneration::processBlock(MachineBasicBlock &B,
   1534       const RegisterSet &AVs) {
   1535   RegisterSet AVB(AVs);
   1536   bool Changed = false;
   1537   RegisterSet Defs;
   1538 
   1539   for (auto I = B.begin(), E = B.end(), NextI = I; I != E;
   1540        ++I, AVB.insert(Defs)) {
   1541     NextI = std::next(I);
   1542     Defs.clear();
   1543     HBS::getInstrDefs(*I, Defs);
   1544 
   1545     unsigned Opc = I->getOpcode();
   1546     if (CopyPropagation::isCopyReg(Opc))
   1547       continue;
   1548 
   1549     for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
   1550       BitTracker::RegisterRef MR;
   1551       if (!findMatch(R, MR, AVB))
   1552         continue;
   1553       DebugLoc DL = I->getDebugLoc();
   1554       auto *FRC = HBS::getFinalVRegClass(MR, MRI);
   1555       unsigned NewR = MRI.createVirtualRegister(FRC);
   1556       auto At = I->isPHI() ? B.getFirstNonPHI() : I;
   1557       BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
   1558         .addReg(MR.Reg, 0, MR.Sub);
   1559       BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
   1560     }
   1561   }
   1562 
   1563   return Changed;
   1564 }
   1565 
   1566 
   1567 bool CopyPropagation::isCopyReg(unsigned Opc) {
   1568   switch (Opc) {
   1569     case TargetOpcode::COPY:
   1570     case TargetOpcode::REG_SEQUENCE:
   1571     case Hexagon::A2_tfr:
   1572     case Hexagon::A2_tfrp:
   1573     case Hexagon::A2_combinew:
   1574     case Hexagon::A4_combineir:
   1575     case Hexagon::A4_combineri:
   1576       return true;
   1577     default:
   1578       break;
   1579   }
   1580   return false;
   1581 }
   1582 
   1583 
   1584 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
   1585   bool Changed = false;
   1586   unsigned Opc = MI.getOpcode();
   1587   BitTracker::RegisterRef RD = MI.getOperand(0);
   1588   assert(MI.getOperand(0).getSubReg() == 0);
   1589 
   1590   switch (Opc) {
   1591     case TargetOpcode::COPY:
   1592     case Hexagon::A2_tfr:
   1593     case Hexagon::A2_tfrp: {
   1594       BitTracker::RegisterRef RS = MI.getOperand(1);
   1595       if (!HBS::isTransparentCopy(RD, RS, MRI))
   1596         break;
   1597       if (RS.Sub != 0)
   1598         Changed = HBS::replaceRegWithSub(RD.Reg, RS.Reg, RS.Sub, MRI);
   1599       else
   1600         Changed = HBS::replaceReg(RD.Reg, RS.Reg, MRI);
   1601       break;
   1602     }
   1603     case TargetOpcode::REG_SEQUENCE: {
   1604       BitTracker::RegisterRef SL, SH;
   1605       if (HBS::parseRegSequence(MI, SL, SH)) {
   1606         Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
   1607                                          SL.Reg, SL.Sub, MRI);
   1608         Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
   1609                                           SH.Reg, SH.Sub, MRI);
   1610       }
   1611       break;
   1612     }
   1613     case Hexagon::A2_combinew: {
   1614       BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
   1615       Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
   1616                                        RL.Reg, RL.Sub, MRI);
   1617       Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
   1618                                         RH.Reg, RH.Sub, MRI);
   1619       break;
   1620     }
   1621     case Hexagon::A4_combineir:
   1622     case Hexagon::A4_combineri: {
   1623       unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
   1624       unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
   1625                                                     : Hexagon::subreg_hireg;
   1626       BitTracker::RegisterRef RS = MI.getOperand(SrcX);
   1627       Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
   1628       break;
   1629     }
   1630   }
   1631   return Changed;
   1632 }
   1633 
   1634 
   1635 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   1636   std::vector<MachineInstr*> Instrs;
   1637   for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
   1638     Instrs.push_back(&*I);
   1639 
   1640   bool Changed = false;
   1641   for (auto I : Instrs) {
   1642     unsigned Opc = I->getOpcode();
   1643     if (!CopyPropagation::isCopyReg(Opc))
   1644       continue;
   1645     Changed |= propagateRegCopy(*I);
   1646   }
   1647 
   1648   return Changed;
   1649 }
   1650 
   1651 
   1652 //
   1653 // Bit simplification
   1654 //
   1655 // Recognize patterns that can be simplified and replace them with the
   1656 // simpler forms.
   1657 // This is by no means complete
   1658 namespace {
   1659   class BitSimplification : public Transformation {
   1660   public:
   1661     BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
   1662         MachineRegisterInfo &mri)
   1663       : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
   1664     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
   1665   private:
   1666     struct RegHalf : public BitTracker::RegisterRef {
   1667       bool Low;  // Low/High halfword.
   1668     };
   1669 
   1670     bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
   1671           unsigned B, RegHalf &RH);
   1672 
   1673     bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
   1674           BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
   1675     unsigned getCombineOpcode(bool HLow, bool LLow);
   1676 
   1677     bool genStoreUpperHalf(MachineInstr *MI);
   1678     bool genStoreImmediate(MachineInstr *MI);
   1679     bool genPackhl(MachineInstr *MI, BitTracker::RegisterRef RD,
   1680           const BitTracker::RegisterCell &RC);
   1681     bool genExtractHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
   1682           const BitTracker::RegisterCell &RC);
   1683     bool genCombineHalf(MachineInstr *MI, BitTracker::RegisterRef RD,
   1684           const BitTracker::RegisterCell &RC);
   1685     bool genExtractLow(MachineInstr *MI, BitTracker::RegisterRef RD,
   1686           const BitTracker::RegisterCell &RC);
   1687     bool simplifyTstbit(MachineInstr *MI, BitTracker::RegisterRef RD,
   1688           const BitTracker::RegisterCell &RC);
   1689 
   1690     const HexagonInstrInfo &HII;
   1691     MachineRegisterInfo &MRI;
   1692     BitTracker &BT;
   1693   };
   1694 }
   1695 
   1696 
   1697 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
   1698 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
   1699 // set the information about the found register in RH.
   1700 bool BitSimplification::matchHalf(unsigned SelfR,
   1701       const BitTracker::RegisterCell &RC, unsigned B, RegHalf &RH) {
   1702   // XXX This could be searching in the set of available registers, in case
   1703   // the match is not exact.
   1704 
   1705   // Match 16-bit chunks, where the RC[B..B+15] references exactly one
   1706   // register and all the bits B..B+15 match between RC and the register.
   1707   // This is meant to match "v1[0-15]", where v1 = { [0]:0 [1-15]:v1... },
   1708   // and RC = { [0]:0 [1-15]:v1[1-15]... }.
   1709   bool Low = false;
   1710   unsigned I = B;
   1711   while (I < B+16 && RC[I].num())
   1712     I++;
   1713   if (I == B+16)
   1714     return false;
   1715 
   1716   unsigned Reg = RC[I].RefI.Reg;
   1717   unsigned P = RC[I].RefI.Pos;    // The RefI.Pos will be advanced by I-B.
   1718   if (P < I-B)
   1719     return false;
   1720   unsigned Pos = P - (I-B);
   1721 
   1722   if (Reg == 0 || Reg == SelfR)    // Don't match "self".
   1723     return false;
   1724   if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1725     return false;
   1726   if (!BT.has(Reg))
   1727     return false;
   1728 
   1729   const BitTracker::RegisterCell &SC = BT.lookup(Reg);
   1730   if (Pos+16 > SC.width())
   1731     return false;
   1732 
   1733   for (unsigned i = 0; i < 16; ++i) {
   1734     const BitTracker::BitValue &RV = RC[i+B];
   1735     if (RV.Type == BitTracker::BitValue::Ref) {
   1736       if (RV.RefI.Reg != Reg)
   1737         return false;
   1738       if (RV.RefI.Pos != i+Pos)
   1739         return false;
   1740       continue;
   1741     }
   1742     if (RC[i+B] != SC[i+Pos])
   1743       return false;
   1744   }
   1745 
   1746   unsigned Sub = 0;
   1747   switch (Pos) {
   1748     case 0:
   1749       Sub = Hexagon::subreg_loreg;
   1750       Low = true;
   1751       break;
   1752     case 16:
   1753       Sub = Hexagon::subreg_loreg;
   1754       Low = false;
   1755       break;
   1756     case 32:
   1757       Sub = Hexagon::subreg_hireg;
   1758       Low = true;
   1759       break;
   1760     case 48:
   1761       Sub = Hexagon::subreg_hireg;
   1762       Low = false;
   1763       break;
   1764     default:
   1765       return false;
   1766   }
   1767 
   1768   RH.Reg = Reg;
   1769   RH.Sub = Sub;
   1770   RH.Low = Low;
   1771   // If the subregister is not valid with the register, set it to 0.
   1772   if (!HBS::getFinalVRegClass(RH, MRI))
   1773     RH.Sub = 0;
   1774 
   1775   return true;
   1776 }
   1777 
   1778 
   1779 // Check if RC matches the pattern of a S2_packhl. If so, return true and
   1780 // set the inputs Rs and Rt.
   1781 bool BitSimplification::matchPackhl(unsigned SelfR,
   1782       const BitTracker::RegisterCell &RC, BitTracker::RegisterRef &Rs,
   1783       BitTracker::RegisterRef &Rt) {
   1784   RegHalf L1, H1, L2, H2;
   1785 
   1786   if (!matchHalf(SelfR, RC, 0, L2)  || !matchHalf(SelfR, RC, 16, L1))
   1787     return false;
   1788   if (!matchHalf(SelfR, RC, 32, H2) || !matchHalf(SelfR, RC, 48, H1))
   1789     return false;
   1790 
   1791   // Rs = H1.L1, Rt = H2.L2
   1792   if (H1.Reg != L1.Reg || H1.Sub != L1.Sub || H1.Low || !L1.Low)
   1793     return false;
   1794   if (H2.Reg != L2.Reg || H2.Sub != L2.Sub || H2.Low || !L2.Low)
   1795     return false;
   1796 
   1797   Rs = H1;
   1798   Rt = H2;
   1799   return true;
   1800 }
   1801 
   1802 
   1803 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
   1804   return HLow ? LLow ? Hexagon::A2_combine_ll
   1805                      : Hexagon::A2_combine_lh
   1806               : LLow ? Hexagon::A2_combine_hl
   1807                      : Hexagon::A2_combine_hh;
   1808 }
   1809 
   1810 
   1811 // If MI stores the upper halfword of a register (potentially obtained via
   1812 // shifts or extracts), replace it with a storerf instruction. This could
   1813 // cause the "extraction" code to become dead.
   1814 bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
   1815   unsigned Opc = MI->getOpcode();
   1816   if (Opc != Hexagon::S2_storerh_io)
   1817     return false;
   1818 
   1819   MachineOperand &ValOp = MI->getOperand(2);
   1820   BitTracker::RegisterRef RS = ValOp;
   1821   if (!BT.has(RS.Reg))
   1822     return false;
   1823   const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
   1824   RegHalf H;
   1825   if (!matchHalf(0, RC, 0, H))
   1826     return false;
   1827   if (H.Low)
   1828     return false;
   1829   MI->setDesc(HII.get(Hexagon::S2_storerf_io));
   1830   ValOp.setReg(H.Reg);
   1831   ValOp.setSubReg(H.Sub);
   1832   return true;
   1833 }
   1834 
   1835 
   1836 // If MI stores a value known at compile-time, and the value is within a range
   1837 // that avoids using constant-extenders, replace it with a store-immediate.
   1838 bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
   1839   unsigned Opc = MI->getOpcode();
   1840   unsigned Align = 0;
   1841   switch (Opc) {
   1842     case Hexagon::S2_storeri_io:
   1843       Align++;
   1844     case Hexagon::S2_storerh_io:
   1845       Align++;
   1846     case Hexagon::S2_storerb_io:
   1847       break;
   1848     default:
   1849       return false;
   1850   }
   1851 
   1852   // Avoid stores to frame-indices (due to an unknown offset).
   1853   if (!MI->getOperand(0).isReg())
   1854     return false;
   1855   MachineOperand &OffOp = MI->getOperand(1);
   1856   if (!OffOp.isImm())
   1857     return false;
   1858 
   1859   int64_t Off = OffOp.getImm();
   1860   // Offset is u6:a. Sadly, there is no isShiftedUInt(n,x).
   1861   if (!isUIntN(6+Align, Off) || (Off & ((1<<Align)-1)))
   1862     return false;
   1863   // Source register:
   1864   BitTracker::RegisterRef RS = MI->getOperand(2);
   1865   if (!BT.has(RS.Reg))
   1866     return false;
   1867   const BitTracker::RegisterCell &RC = BT.lookup(RS.Reg);
   1868   uint64_t U;
   1869   if (!HBS::getConst(RC, 0, RC.width(), U))
   1870     return false;
   1871 
   1872   // Only consider 8-bit values to avoid constant-extenders.
   1873   int V;
   1874   switch (Opc) {
   1875     case Hexagon::S2_storerb_io:
   1876       V = int8_t(U);
   1877       break;
   1878     case Hexagon::S2_storerh_io:
   1879       V = int16_t(U);
   1880       break;
   1881     case Hexagon::S2_storeri_io:
   1882       V = int32_t(U);
   1883       break;
   1884   }
   1885   if (!isInt<8>(V))
   1886     return false;
   1887 
   1888   MI->RemoveOperand(2);
   1889   switch (Opc) {
   1890     case Hexagon::S2_storerb_io:
   1891       MI->setDesc(HII.get(Hexagon::S4_storeirb_io));
   1892       break;
   1893     case Hexagon::S2_storerh_io:
   1894       MI->setDesc(HII.get(Hexagon::S4_storeirh_io));
   1895       break;
   1896     case Hexagon::S2_storeri_io:
   1897       MI->setDesc(HII.get(Hexagon::S4_storeiri_io));
   1898       break;
   1899   }
   1900   MI->addOperand(MachineOperand::CreateImm(V));
   1901   return true;
   1902 }
   1903 
   1904 
   1905 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
   1906 // last instruction in a sequence that results in something equivalent to
   1907 // the pack-halfwords. The intent is to cause the entire sequence to become
   1908 // dead.
   1909 bool BitSimplification::genPackhl(MachineInstr *MI,
   1910       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
   1911   unsigned Opc = MI->getOpcode();
   1912   if (Opc == Hexagon::S2_packhl)
   1913     return false;
   1914   BitTracker::RegisterRef Rs, Rt;
   1915   if (!matchPackhl(RD.Reg, RC, Rs, Rt))
   1916     return false;
   1917 
   1918   MachineBasicBlock &B = *MI->getParent();
   1919   unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
   1920   DebugLoc DL = MI->getDebugLoc();
   1921   auto At = MI->isPHI() ? B.getFirstNonPHI()
   1922                         : MachineBasicBlock::iterator(MI);
   1923   BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
   1924       .addReg(Rs.Reg, 0, Rs.Sub)
   1925       .addReg(Rt.Reg, 0, Rt.Sub);
   1926   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
   1927   BT.put(BitTracker::RegisterRef(NewR), RC);
   1928   return true;
   1929 }
   1930 
   1931 
   1932 // If MI produces halfword of the input in the low half of the output,
   1933 // replace it with zero-extend or extractu.
   1934 bool BitSimplification::genExtractHalf(MachineInstr *MI,
   1935       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
   1936   RegHalf L;
   1937   // Check for halfword in low 16 bits, zeros elsewhere.
   1938   if (!matchHalf(RD.Reg, RC, 0, L) || !HBS::isZero(RC, 16, 16))
   1939     return false;
   1940 
   1941   unsigned Opc = MI->getOpcode();
   1942   MachineBasicBlock &B = *MI->getParent();
   1943   DebugLoc DL = MI->getDebugLoc();
   1944 
   1945   // Prefer zxth, since zxth can go in any slot, while extractu only in
   1946   // slots 2 and 3.
   1947   unsigned NewR = 0;
   1948   auto At = MI->isPHI() ? B.getFirstNonPHI()
   1949                         : MachineBasicBlock::iterator(MI);
   1950   if (L.Low && Opc != Hexagon::A2_zxth) {
   1951     NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   1952     BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
   1953         .addReg(L.Reg, 0, L.Sub);
   1954   } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
   1955     NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   1956     BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
   1957         .addReg(L.Reg, 0, L.Sub)
   1958         .addImm(16);
   1959   }
   1960   if (NewR == 0)
   1961     return false;
   1962   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
   1963   BT.put(BitTracker::RegisterRef(NewR), RC);
   1964   return true;
   1965 }
   1966 
   1967 
   1968 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
   1969 // combine.
   1970 bool BitSimplification::genCombineHalf(MachineInstr *MI,
   1971       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
   1972   RegHalf L, H;
   1973   // Check for combine h/l
   1974   if (!matchHalf(RD.Reg, RC, 0, L) || !matchHalf(RD.Reg, RC, 16, H))
   1975     return false;
   1976   // Do nothing if this is just a reg copy.
   1977   if (L.Reg == H.Reg && L.Sub == H.Sub && !H.Low && L.Low)
   1978     return false;
   1979 
   1980   unsigned Opc = MI->getOpcode();
   1981   unsigned COpc = getCombineOpcode(H.Low, L.Low);
   1982   if (COpc == Opc)
   1983     return false;
   1984 
   1985   MachineBasicBlock &B = *MI->getParent();
   1986   DebugLoc DL = MI->getDebugLoc();
   1987   unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   1988   auto At = MI->isPHI() ? B.getFirstNonPHI()
   1989                         : MachineBasicBlock::iterator(MI);
   1990   BuildMI(B, At, DL, HII.get(COpc), NewR)
   1991       .addReg(H.Reg, 0, H.Sub)
   1992       .addReg(L.Reg, 0, L.Sub);
   1993   HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
   1994   BT.put(BitTracker::RegisterRef(NewR), RC);
   1995   return true;
   1996 }
   1997 
   1998 
   1999 // If MI resets high bits of a register and keeps the lower ones, replace it
   2000 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
   2001 bool BitSimplification::genExtractLow(MachineInstr *MI,
   2002       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
   2003   unsigned Opc = MI->getOpcode();
   2004   switch (Opc) {
   2005     case Hexagon::A2_zxtb:
   2006     case Hexagon::A2_zxth:
   2007     case Hexagon::S2_extractu:
   2008       return false;
   2009   }
   2010   if (Opc == Hexagon::A2_andir && MI->getOperand(2).isImm()) {
   2011     int32_t Imm = MI->getOperand(2).getImm();
   2012     if (isInt<10>(Imm))
   2013       return false;
   2014   }
   2015 
   2016   if (MI->hasUnmodeledSideEffects() || MI->isInlineAsm())
   2017     return false;
   2018   unsigned W = RC.width();
   2019   while (W > 0 && RC[W-1].is(0))
   2020     W--;
   2021   if (W == 0 || W == RC.width())
   2022     return false;
   2023   unsigned NewOpc = (W == 8)  ? Hexagon::A2_zxtb
   2024                   : (W == 16) ? Hexagon::A2_zxth
   2025                   : (W < 10)  ? Hexagon::A2_andir
   2026                   : Hexagon::S2_extractu;
   2027   MachineBasicBlock &B = *MI->getParent();
   2028   DebugLoc DL = MI->getDebugLoc();
   2029 
   2030   for (auto &Op : MI->uses()) {
   2031     if (!Op.isReg())
   2032       continue;
   2033     BitTracker::RegisterRef RS = Op;
   2034     if (!BT.has(RS.Reg))
   2035       continue;
   2036     const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
   2037     unsigned BN, BW;
   2038     if (!HBS::getSubregMask(RS, BN, BW, MRI))
   2039       continue;
   2040     if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
   2041       continue;
   2042 
   2043     unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
   2044     auto At = MI->isPHI() ? B.getFirstNonPHI()
   2045                           : MachineBasicBlock::iterator(MI);
   2046     auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
   2047                   .addReg(RS.Reg, 0, RS.Sub);
   2048     if (NewOpc == Hexagon::A2_andir)
   2049       MIB.addImm((1 << W) - 1);
   2050     else if (NewOpc == Hexagon::S2_extractu)
   2051       MIB.addImm(W).addImm(0);
   2052     HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
   2053     BT.put(BitTracker::RegisterRef(NewR), RC);
   2054     return true;
   2055   }
   2056   return false;
   2057 }
   2058 
   2059 
   2060 // Check for tstbit simplification opportunity, where the bit being checked
   2061 // can be tracked back to another register. For example:
   2062 //   vreg2 = S2_lsr_i_r  vreg1, 5
   2063 //   vreg3 = S2_tstbit_i vreg2, 0
   2064 // =>
   2065 //   vreg3 = S2_tstbit_i vreg1, 5
   2066 bool BitSimplification::simplifyTstbit(MachineInstr *MI,
   2067       BitTracker::RegisterRef RD, const BitTracker::RegisterCell &RC) {
   2068   unsigned Opc = MI->getOpcode();
   2069   if (Opc != Hexagon::S2_tstbit_i)
   2070     return false;
   2071 
   2072   unsigned BN = MI->getOperand(2).getImm();
   2073   BitTracker::RegisterRef RS = MI->getOperand(1);
   2074   unsigned F, W;
   2075   DebugLoc DL = MI->getDebugLoc();
   2076   if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
   2077     return false;
   2078   MachineBasicBlock &B = *MI->getParent();
   2079   auto At = MI->isPHI() ? B.getFirstNonPHI()
   2080                         : MachineBasicBlock::iterator(MI);
   2081 
   2082   const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
   2083   const BitTracker::BitValue &V = SC[F+BN];
   2084   if (V.Type == BitTracker::BitValue::Ref && V.RefI.Reg != RS.Reg) {
   2085     const TargetRegisterClass *TC = MRI.getRegClass(V.RefI.Reg);
   2086     // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
   2087     // a double register, need to use a subregister and adjust bit
   2088     // number.
   2089     unsigned P = UINT_MAX;
   2090     BitTracker::RegisterRef RR(V.RefI.Reg, 0);
   2091     if (TC == &Hexagon::DoubleRegsRegClass) {
   2092       P = V.RefI.Pos;
   2093       RR.Sub = Hexagon::subreg_loreg;
   2094       if (P >= 32) {
   2095         P -= 32;
   2096         RR.Sub = Hexagon::subreg_hireg;
   2097       }
   2098     } else if (TC == &Hexagon::IntRegsRegClass) {
   2099       P = V.RefI.Pos;
   2100     }
   2101     if (P != UINT_MAX) {
   2102       unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
   2103       BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
   2104           .addReg(RR.Reg, 0, RR.Sub)
   2105           .addImm(P);
   2106       HBS::replaceReg(RD.Reg, NewR, MRI);
   2107       BT.put(NewR, RC);
   2108       return true;
   2109     }
   2110   } else if (V.is(0) || V.is(1)) {
   2111     unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
   2112     unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
   2113     BuildMI(B, At, DL, HII.get(NewOpc), NewR);
   2114     HBS::replaceReg(RD.Reg, NewR, MRI);
   2115     return true;
   2116   }
   2117 
   2118   return false;
   2119 }
   2120 
   2121 
   2122 bool BitSimplification::processBlock(MachineBasicBlock &B,
   2123       const RegisterSet &AVs) {
   2124   bool Changed = false;
   2125   RegisterSet AVB = AVs;
   2126   RegisterSet Defs;
   2127 
   2128   for (auto I = B.begin(), E = B.end(); I != E; ++I, AVB.insert(Defs)) {
   2129     MachineInstr *MI = &*I;
   2130     Defs.clear();
   2131     HBS::getInstrDefs(*MI, Defs);
   2132 
   2133     unsigned Opc = MI->getOpcode();
   2134     if (Opc == TargetOpcode::COPY || Opc == TargetOpcode::REG_SEQUENCE)
   2135       continue;
   2136 
   2137     if (MI->mayStore()) {
   2138       bool T = genStoreUpperHalf(MI);
   2139       T = T || genStoreImmediate(MI);
   2140       Changed |= T;
   2141       continue;
   2142     }
   2143 
   2144     if (Defs.count() != 1)
   2145       continue;
   2146     const MachineOperand &Op0 = MI->getOperand(0);
   2147     if (!Op0.isReg() || !Op0.isDef())
   2148       continue;
   2149     BitTracker::RegisterRef RD = Op0;
   2150     if (!BT.has(RD.Reg))
   2151       continue;
   2152     const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
   2153     const BitTracker::RegisterCell &RC = BT.lookup(RD.Reg);
   2154 
   2155     if (FRC->getID() == Hexagon::DoubleRegsRegClassID) {
   2156       bool T = genPackhl(MI, RD, RC);
   2157       Changed |= T;
   2158       continue;
   2159     }
   2160 
   2161     if (FRC->getID() == Hexagon::IntRegsRegClassID) {
   2162       bool T = genExtractHalf(MI, RD, RC);
   2163       T = T || genCombineHalf(MI, RD, RC);
   2164       T = T || genExtractLow(MI, RD, RC);
   2165       Changed |= T;
   2166       continue;
   2167     }
   2168 
   2169     if (FRC->getID() == Hexagon::PredRegsRegClassID) {
   2170       bool T = simplifyTstbit(MI, RD, RC);
   2171       Changed |= T;
   2172       continue;
   2173     }
   2174   }
   2175   return Changed;
   2176 }
   2177 
   2178 
   2179 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
   2180   if (skipFunction(*MF.getFunction()))
   2181     return false;
   2182 
   2183   auto &HST = MF.getSubtarget<HexagonSubtarget>();
   2184   auto &HRI = *HST.getRegisterInfo();
   2185   auto &HII = *HST.getInstrInfo();
   2186 
   2187   MDT = &getAnalysis<MachineDominatorTree>();
   2188   MachineRegisterInfo &MRI = MF.getRegInfo();
   2189   bool Changed;
   2190 
   2191   Changed = DeadCodeElimination(MF, *MDT).run();
   2192 
   2193   const HexagonEvaluator HE(HRI, MRI, HII, MF);
   2194   BitTracker BT(HE, MF);
   2195   DEBUG(BT.trace(true));
   2196   BT.run();
   2197 
   2198   MachineBasicBlock &Entry = MF.front();
   2199 
   2200   RegisterSet AIG;  // Available registers for IG.
   2201   ConstGeneration ImmG(BT, HII, MRI);
   2202   Changed |= visitBlock(Entry, ImmG, AIG);
   2203 
   2204   RegisterSet ARE;  // Available registers for RIE.
   2205   RedundantInstrElimination RIE(BT, HII, MRI);
   2206   Changed |= visitBlock(Entry, RIE, ARE);
   2207 
   2208   RegisterSet ACG;  // Available registers for CG.
   2209   CopyGeneration CopyG(BT, HII, MRI);
   2210   Changed |= visitBlock(Entry, CopyG, ACG);
   2211 
   2212   RegisterSet ACP;  // Available registers for CP.
   2213   CopyPropagation CopyP(HRI, MRI);
   2214   Changed |= visitBlock(Entry, CopyP, ACP);
   2215 
   2216   Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
   2217 
   2218   BT.run();
   2219   RegisterSet ABS;  // Available registers for BS.
   2220   BitSimplification BitS(BT, HII, MRI);
   2221   Changed |= visitBlock(Entry, BitS, ABS);
   2222 
   2223   Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
   2224 
   2225   if (Changed) {
   2226     for (auto &B : MF)
   2227       for (auto &I : B)
   2228         I.clearKillInfo();
   2229     DeadCodeElimination(MF, *MDT).run();
   2230   }
   2231   return Changed;
   2232 }
   2233 
   2234 
   2235 // Recognize loops where the code at the end of the loop matches the code
   2236 // before the entry of the loop, and the matching code is such that is can
   2237 // be simplified. This pass relies on the bit simplification above and only
   2238 // prepares code in a way that can be handled by the bit simplifcation.
   2239 //
   2240 // This is the motivating testcase (and explanation):
   2241 //
   2242 // {
   2243 //   loop0(.LBB0_2, r1)      // %for.body.preheader
   2244 //   r5:4 = memd(r0++#8)
   2245 // }
   2246 // {
   2247 //   r3 = lsr(r4, #16)
   2248 //   r7:6 = combine(r5, r5)
   2249 // }
   2250 // {
   2251 //   r3 = insert(r5, #16, #16)
   2252 //   r7:6 = vlsrw(r7:6, #16)
   2253 // }
   2254 // .LBB0_2:
   2255 // {
   2256 //   memh(r2+#4) = r5
   2257 //   memh(r2+#6) = r6            # R6 is really R5.H
   2258 // }
   2259 // {
   2260 //   r2 = add(r2, #8)
   2261 //   memh(r2+#0) = r4
   2262 //   memh(r2+#2) = r3            # R3 is really R4.H
   2263 // }
   2264 // {
   2265 //   r5:4 = memd(r0++#8)
   2266 // }
   2267 // {                             # "Shuffling" code that sets up R3 and R6
   2268 //   r3 = lsr(r4, #16)           # so that their halves can be stored in the
   2269 //   r7:6 = combine(r5, r5)      # next iteration. This could be folded into
   2270 // }                             # the stores if the code was at the beginning
   2271 // {                             # of the loop iteration. Since the same code
   2272 //   r3 = insert(r5, #16, #16)   # precedes the loop, it can actually be moved
   2273 //   r7:6 = vlsrw(r7:6, #16)     # there.
   2274 // }:endloop0
   2275 //
   2276 //
   2277 // The outcome:
   2278 //
   2279 // {
   2280 //   loop0(.LBB0_2, r1)
   2281 //   r5:4 = memd(r0++#8)
   2282 // }
   2283 // .LBB0_2:
   2284 // {
   2285 //   memh(r2+#4) = r5
   2286 //   memh(r2+#6) = r5.h
   2287 // }
   2288 // {
   2289 //   r2 = add(r2, #8)
   2290 //   memh(r2+#0) = r4
   2291 //   memh(r2+#2) = r4.h
   2292 // }
   2293 // {
   2294 //   r5:4 = memd(r0++#8)
   2295 // }:endloop0
   2296 
   2297 namespace llvm {
   2298   FunctionPass *createHexagonLoopRescheduling();
   2299   void initializeHexagonLoopReschedulingPass(PassRegistry&);
   2300 }
   2301 
   2302 namespace {
   2303   class HexagonLoopRescheduling : public MachineFunctionPass {
   2304   public:
   2305     static char ID;
   2306     HexagonLoopRescheduling() : MachineFunctionPass(ID),
   2307         HII(0), HRI(0), MRI(0), BTP(0) {
   2308       initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
   2309     }
   2310 
   2311     bool runOnMachineFunction(MachineFunction &MF) override;
   2312 
   2313   private:
   2314     const HexagonInstrInfo *HII;
   2315     const HexagonRegisterInfo *HRI;
   2316     MachineRegisterInfo *MRI;
   2317     BitTracker *BTP;
   2318 
   2319     struct LoopCand {
   2320       LoopCand(MachineBasicBlock *lb, MachineBasicBlock *pb,
   2321             MachineBasicBlock *eb) : LB(lb), PB(pb), EB(eb) {}
   2322       MachineBasicBlock *LB, *PB, *EB;
   2323     };
   2324     typedef std::vector<MachineInstr*> InstrList;
   2325     struct InstrGroup {
   2326       BitTracker::RegisterRef Inp, Out;
   2327       InstrList Ins;
   2328     };
   2329     struct PhiInfo {
   2330       PhiInfo(MachineInstr &P, MachineBasicBlock &B);
   2331       unsigned DefR;
   2332       BitTracker::RegisterRef LR, PR;
   2333       MachineBasicBlock *LB, *PB;
   2334     };
   2335 
   2336     static unsigned getDefReg(const MachineInstr *MI);
   2337     bool isConst(unsigned Reg) const;
   2338     bool isBitShuffle(const MachineInstr *MI, unsigned DefR) const;
   2339     bool isStoreInput(const MachineInstr *MI, unsigned DefR) const;
   2340     bool isShuffleOf(unsigned OutR, unsigned InpR) const;
   2341     bool isSameShuffle(unsigned OutR1, unsigned InpR1, unsigned OutR2,
   2342         unsigned &InpR2) const;
   2343     void moveGroup(InstrGroup &G, MachineBasicBlock &LB, MachineBasicBlock &PB,
   2344         MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
   2345     bool processLoop(LoopCand &C);
   2346   };
   2347 }
   2348 
   2349 char HexagonLoopRescheduling::ID = 0;
   2350 
   2351 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
   2352   "Hexagon Loop Rescheduling", false, false)
   2353 
   2354 
   2355 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
   2356       MachineBasicBlock &B) {
   2357   DefR = HexagonLoopRescheduling::getDefReg(&P);
   2358   LB = &B;
   2359   PB = nullptr;
   2360   for (unsigned i = 1, n = P.getNumOperands(); i < n; i += 2) {
   2361     const MachineOperand &OpB = P.getOperand(i+1);
   2362     if (OpB.getMBB() == &B) {
   2363       LR = P.getOperand(i);
   2364       continue;
   2365     }
   2366     PB = OpB.getMBB();
   2367     PR = P.getOperand(i);
   2368   }
   2369 }
   2370 
   2371 
   2372 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
   2373   RegisterSet Defs;
   2374   HBS::getInstrDefs(*MI, Defs);
   2375   if (Defs.count() != 1)
   2376     return 0;
   2377   return Defs.find_first();
   2378 }
   2379 
   2380 
   2381 bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
   2382   if (!BTP->has(Reg))
   2383     return false;
   2384   const BitTracker::RegisterCell &RC = BTP->lookup(Reg);
   2385   for (unsigned i = 0, w = RC.width(); i < w; ++i) {
   2386     const BitTracker::BitValue &V = RC[i];
   2387     if (!V.is(0) && !V.is(1))
   2388       return false;
   2389   }
   2390   return true;
   2391 }
   2392 
   2393 
   2394 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
   2395       unsigned DefR) const {
   2396   unsigned Opc = MI->getOpcode();
   2397   switch (Opc) {
   2398     case TargetOpcode::COPY:
   2399     case Hexagon::S2_lsr_i_r:
   2400     case Hexagon::S2_asr_i_r:
   2401     case Hexagon::S2_asl_i_r:
   2402     case Hexagon::S2_lsr_i_p:
   2403     case Hexagon::S2_asr_i_p:
   2404     case Hexagon::S2_asl_i_p:
   2405     case Hexagon::S2_insert:
   2406     case Hexagon::A2_or:
   2407     case Hexagon::A2_orp:
   2408     case Hexagon::A2_and:
   2409     case Hexagon::A2_andp:
   2410     case Hexagon::A2_combinew:
   2411     case Hexagon::A4_combineri:
   2412     case Hexagon::A4_combineir:
   2413     case Hexagon::A2_combineii:
   2414     case Hexagon::A4_combineii:
   2415     case Hexagon::A2_combine_ll:
   2416     case Hexagon::A2_combine_lh:
   2417     case Hexagon::A2_combine_hl:
   2418     case Hexagon::A2_combine_hh:
   2419       return true;
   2420   }
   2421   return false;
   2422 }
   2423 
   2424 
   2425 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
   2426       unsigned InpR) const {
   2427   for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
   2428     const MachineOperand &Op = MI->getOperand(i);
   2429     if (!Op.isReg())
   2430       continue;
   2431     if (Op.getReg() == InpR)
   2432       return i == n-1;
   2433   }
   2434   return false;
   2435 }
   2436 
   2437 
   2438 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
   2439   if (!BTP->has(OutR) || !BTP->has(InpR))
   2440     return false;
   2441   const BitTracker::RegisterCell &OutC = BTP->lookup(OutR);
   2442   for (unsigned i = 0, w = OutC.width(); i < w; ++i) {
   2443     const BitTracker::BitValue &V = OutC[i];
   2444     if (V.Type != BitTracker::BitValue::Ref)
   2445       continue;
   2446     if (V.RefI.Reg != InpR)
   2447       return false;
   2448   }
   2449   return true;
   2450 }
   2451 
   2452 
   2453 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
   2454       unsigned OutR2, unsigned &InpR2) const {
   2455   if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
   2456     return false;
   2457   const BitTracker::RegisterCell &OutC1 = BTP->lookup(OutR1);
   2458   const BitTracker::RegisterCell &OutC2 = BTP->lookup(OutR2);
   2459   unsigned W = OutC1.width();
   2460   unsigned MatchR = 0;
   2461   if (W != OutC2.width())
   2462     return false;
   2463   for (unsigned i = 0; i < W; ++i) {
   2464     const BitTracker::BitValue &V1 = OutC1[i], &V2 = OutC2[i];
   2465     if (V1.Type != V2.Type || V1.Type == BitTracker::BitValue::One)
   2466       return false;
   2467     if (V1.Type != BitTracker::BitValue::Ref)
   2468       continue;
   2469     if (V1.RefI.Pos != V2.RefI.Pos)
   2470       return false;
   2471     if (V1.RefI.Reg != InpR1)
   2472       return false;
   2473     if (V2.RefI.Reg == 0 || V2.RefI.Reg == OutR2)
   2474       return false;
   2475     if (!MatchR)
   2476       MatchR = V2.RefI.Reg;
   2477     else if (V2.RefI.Reg != MatchR)
   2478       return false;
   2479   }
   2480   InpR2 = MatchR;
   2481   return true;
   2482 }
   2483 
   2484 
   2485 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
   2486       MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
   2487       unsigned NewPredR) {
   2488   DenseMap<unsigned,unsigned> RegMap;
   2489 
   2490   const TargetRegisterClass *PhiRC = MRI->getRegClass(NewPredR);
   2491   unsigned PhiR = MRI->createVirtualRegister(PhiRC);
   2492   BuildMI(LB, At, At->getDebugLoc(), HII->get(TargetOpcode::PHI), PhiR)
   2493     .addReg(NewPredR)
   2494     .addMBB(&PB)
   2495     .addReg(G.Inp.Reg)
   2496     .addMBB(&LB);
   2497   RegMap.insert(std::make_pair(G.Inp.Reg, PhiR));
   2498 
   2499   for (unsigned i = G.Ins.size(); i > 0; --i) {
   2500     const MachineInstr *SI = G.Ins[i-1];
   2501     unsigned DR = getDefReg(SI);
   2502     const TargetRegisterClass *RC = MRI->getRegClass(DR);
   2503     unsigned NewDR = MRI->createVirtualRegister(RC);
   2504     DebugLoc DL = SI->getDebugLoc();
   2505 
   2506     auto MIB = BuildMI(LB, At, DL, HII->get(SI->getOpcode()), NewDR);
   2507     for (unsigned j = 0, m = SI->getNumOperands(); j < m; ++j) {
   2508       const MachineOperand &Op = SI->getOperand(j);
   2509       if (!Op.isReg()) {
   2510         MIB.addOperand(Op);
   2511         continue;
   2512       }
   2513       if (!Op.isUse())
   2514         continue;
   2515       unsigned UseR = RegMap[Op.getReg()];
   2516       MIB.addReg(UseR, 0, Op.getSubReg());
   2517     }
   2518     RegMap.insert(std::make_pair(DR, NewDR));
   2519   }
   2520 
   2521   HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
   2522 }
   2523 
   2524 
   2525 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
   2526   DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
   2527   std::vector<PhiInfo> Phis;
   2528   for (auto &I : *C.LB) {
   2529     if (!I.isPHI())
   2530       break;
   2531     unsigned PR = getDefReg(&I);
   2532     if (isConst(PR))
   2533       continue;
   2534     bool BadUse = false, GoodUse = false;
   2535     for (auto UI = MRI->use_begin(PR), UE = MRI->use_end(); UI != UE; ++UI) {
   2536       MachineInstr *UseI = UI->getParent();
   2537       if (UseI->getParent() != C.LB) {
   2538         BadUse = true;
   2539         break;
   2540       }
   2541       if (isBitShuffle(UseI, PR) || isStoreInput(UseI, PR))
   2542         GoodUse = true;
   2543     }
   2544     if (BadUse || !GoodUse)
   2545       continue;
   2546 
   2547     Phis.push_back(PhiInfo(I, *C.LB));
   2548   }
   2549 
   2550   DEBUG({
   2551     dbgs() << "Phis: {";
   2552     for (auto &I : Phis) {
   2553       dbgs() << ' ' << PrintReg(I.DefR, HRI) << "=phi("
   2554              << PrintReg(I.PR.Reg, HRI, I.PR.Sub) << ":b" << I.PB->getNumber()
   2555              << ',' << PrintReg(I.LR.Reg, HRI, I.LR.Sub) << ":b"
   2556              << I.LB->getNumber() << ')';
   2557     }
   2558     dbgs() << " }\n";
   2559   });
   2560 
   2561   if (Phis.empty())
   2562     return false;
   2563 
   2564   bool Changed = false;
   2565   InstrList ShufIns;
   2566 
   2567   // Go backwards in the block: for each bit shuffling instruction, check
   2568   // if that instruction could potentially be moved to the front of the loop:
   2569   // the output of the loop cannot be used in a non-shuffling instruction
   2570   // in this loop.
   2571   for (auto I = C.LB->rbegin(), E = C.LB->rend(); I != E; ++I) {
   2572     if (I->isTerminator())
   2573       continue;
   2574     if (I->isPHI())
   2575       break;
   2576 
   2577     RegisterSet Defs;
   2578     HBS::getInstrDefs(*I, Defs);
   2579     if (Defs.count() != 1)
   2580       continue;
   2581     unsigned DefR = Defs.find_first();
   2582     if (!TargetRegisterInfo::isVirtualRegister(DefR))
   2583       continue;
   2584     if (!isBitShuffle(&*I, DefR))
   2585       continue;
   2586 
   2587     bool BadUse = false;
   2588     for (auto UI = MRI->use_begin(DefR), UE = MRI->use_end(); UI != UE; ++UI) {
   2589       MachineInstr *UseI = UI->getParent();
   2590       if (UseI->getParent() == C.LB) {
   2591         if (UseI->isPHI()) {
   2592           // If the use is in a phi node in this loop, then it should be
   2593           // the value corresponding to the back edge.
   2594           unsigned Idx = UI.getOperandNo();
   2595           if (UseI->getOperand(Idx+1).getMBB() != C.LB)
   2596             BadUse = true;
   2597         } else {
   2598           auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
   2599           if (F == ShufIns.end())
   2600             BadUse = true;
   2601         }
   2602       } else {
   2603         // There is a use outside of the loop, but there is no epilog block
   2604         // suitable for a copy-out.
   2605         if (C.EB == nullptr)
   2606           BadUse = true;
   2607       }
   2608       if (BadUse)
   2609         break;
   2610     }
   2611 
   2612     if (BadUse)
   2613       continue;
   2614     ShufIns.push_back(&*I);
   2615   }
   2616 
   2617   // Partition the list of shuffling instructions into instruction groups,
   2618   // where each group has to be moved as a whole (i.e. a group is a chain of
   2619   // dependent instructions). A group produces a single live output register,
   2620   // which is meant to be the input of the loop phi node (although this is
   2621   // not checked here yet). It also uses a single register as its input,
   2622   // which is some value produced in the loop body. After moving the group
   2623   // to the beginning of the loop, that input register would need to be
   2624   // the loop-carried register (through a phi node) instead of the (currently
   2625   // loop-carried) output register.
   2626   typedef std::vector<InstrGroup> InstrGroupList;
   2627   InstrGroupList Groups;
   2628 
   2629   for (unsigned i = 0, n = ShufIns.size(); i < n; ++i) {
   2630     MachineInstr *SI = ShufIns[i];
   2631     if (SI == nullptr)
   2632       continue;
   2633 
   2634     InstrGroup G;
   2635     G.Ins.push_back(SI);
   2636     G.Out.Reg = getDefReg(SI);
   2637     RegisterSet Inputs;
   2638     HBS::getInstrUses(*SI, Inputs);
   2639 
   2640     for (unsigned j = i+1; j < n; ++j) {
   2641       MachineInstr *MI = ShufIns[j];
   2642       if (MI == nullptr)
   2643         continue;
   2644       RegisterSet Defs;
   2645       HBS::getInstrDefs(*MI, Defs);
   2646       // If this instruction does not define any pending inputs, skip it.
   2647       if (!Defs.intersects(Inputs))
   2648         continue;
   2649       // Otherwise, add it to the current group and remove the inputs that
   2650       // are defined by MI.
   2651       G.Ins.push_back(MI);
   2652       Inputs.remove(Defs);
   2653       // Then add all registers used by MI.
   2654       HBS::getInstrUses(*MI, Inputs);
   2655       ShufIns[j] = nullptr;
   2656     }
   2657 
   2658     // Only add a group if it requires at most one register.
   2659     if (Inputs.count() > 1)
   2660       continue;
   2661     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
   2662       return G.Out.Reg == P.LR.Reg;
   2663     };
   2664     if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
   2665       continue;
   2666 
   2667     G.Inp.Reg = Inputs.find_first();
   2668     Groups.push_back(G);
   2669   }
   2670 
   2671   DEBUG({
   2672     for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
   2673       InstrGroup &G = Groups[i];
   2674       dbgs() << "Group[" << i << "] inp: "
   2675              << PrintReg(G.Inp.Reg, HRI, G.Inp.Sub)
   2676              << "  out: " << PrintReg(G.Out.Reg, HRI, G.Out.Sub) << "\n";
   2677       for (unsigned j = 0, m = G.Ins.size(); j < m; ++j)
   2678         dbgs() << "  " << *G.Ins[j];
   2679     }
   2680   });
   2681 
   2682   for (unsigned i = 0, n = Groups.size(); i < n; ++i) {
   2683     InstrGroup &G = Groups[i];
   2684     if (!isShuffleOf(G.Out.Reg, G.Inp.Reg))
   2685       continue;
   2686     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
   2687       return G.Out.Reg == P.LR.Reg;
   2688     };
   2689     auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
   2690     if (F == Phis.end())
   2691       continue;
   2692     unsigned PredR = 0;
   2693     if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
   2694       const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
   2695       unsigned Opc = DefPredR->getOpcode();
   2696       if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
   2697         continue;
   2698       if (!DefPredR->getOperand(1).isImm())
   2699         continue;
   2700       if (DefPredR->getOperand(1).getImm() != 0)
   2701         continue;
   2702       const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
   2703       if (RC != MRI->getRegClass(F->PR.Reg)) {
   2704         PredR = MRI->createVirtualRegister(RC);
   2705         unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
   2706                                                           : Hexagon::A2_tfrpi;
   2707         auto T = C.PB->getFirstTerminator();
   2708         DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
   2709         BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
   2710           .addImm(0);
   2711       } else {
   2712         PredR = F->PR.Reg;
   2713       }
   2714     }
   2715     assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
   2716     moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
   2717     Changed = true;
   2718   }
   2719 
   2720   return Changed;
   2721 }
   2722 
   2723 
   2724 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
   2725   if (skipFunction(*MF.getFunction()))
   2726     return false;
   2727 
   2728   auto &HST = MF.getSubtarget<HexagonSubtarget>();
   2729   HII = HST.getInstrInfo();
   2730   HRI = HST.getRegisterInfo();
   2731   MRI = &MF.getRegInfo();
   2732   const HexagonEvaluator HE(*HRI, *MRI, *HII, MF);
   2733   BitTracker BT(HE, MF);
   2734   DEBUG(BT.trace(true));
   2735   BT.run();
   2736   BTP = &BT;
   2737 
   2738   std::vector<LoopCand> Cand;
   2739 
   2740   for (auto &B : MF) {
   2741     if (B.pred_size() != 2 || B.succ_size() != 2)
   2742       continue;
   2743     MachineBasicBlock *PB = nullptr;
   2744     bool IsLoop = false;
   2745     for (auto PI = B.pred_begin(), PE = B.pred_end(); PI != PE; ++PI) {
   2746       if (*PI != &B)
   2747         PB = *PI;
   2748       else
   2749         IsLoop = true;
   2750     }
   2751     if (!IsLoop)
   2752       continue;
   2753 
   2754     MachineBasicBlock *EB = nullptr;
   2755     for (auto SI = B.succ_begin(), SE = B.succ_end(); SI != SE; ++SI) {
   2756       if (*SI == &B)
   2757         continue;
   2758       // Set EP to the epilog block, if it has only 1 predecessor (i.e. the
   2759       // edge from B to EP is non-critical.
   2760       if ((*SI)->pred_size() == 1)
   2761         EB = *SI;
   2762       break;
   2763     }
   2764 
   2765     Cand.push_back(LoopCand(&B, PB, EB));
   2766   }
   2767 
   2768   bool Changed = false;
   2769   for (auto &C : Cand)
   2770     Changed |= processLoop(C);
   2771 
   2772   return Changed;
   2773 }
   2774 
   2775 //===----------------------------------------------------------------------===//
   2776 //                         Public Constructor Functions
   2777 //===----------------------------------------------------------------------===//
   2778 
   2779 FunctionPass *llvm::createHexagonLoopRescheduling() {
   2780   return new HexagonLoopRescheduling();
   2781 }
   2782 
   2783 FunctionPass *llvm::createHexagonBitSimplify() {
   2784   return new HexagonBitSimplify();
   2785 }
   2786 
   2787