Home | History | Annotate | Download | only in Hexagon
      1 //===- HexagonConstExtenders.cpp ------------------------------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "HexagonInstrInfo.h"
     11 #include "HexagonRegisterInfo.h"
     12 #include "HexagonSubtarget.h"
     13 #include "llvm/ADT/SmallVector.h"
     14 #include "llvm/CodeGen/MachineDominators.h"
     15 #include "llvm/CodeGen/MachineFunctionPass.h"
     16 #include "llvm/CodeGen/MachineInstrBuilder.h"
     17 #include "llvm/CodeGen/MachineRegisterInfo.h"
     18 #include "llvm/Support/CommandLine.h"
     19 #include "llvm/Support/raw_ostream.h"
     20 #include "llvm/Pass.h"
     21 #include <map>
     22 #include <set>
     23 #include <utility>
     24 #include <vector>
     25 
     26 #define DEBUG_TYPE "hexagon-cext-opt"
     27 
     28 using namespace llvm;
     29 
     30 static cl::opt<unsigned> CountThreshold("hexagon-cext-threshold",
     31   cl::init(3), cl::Hidden, cl::ZeroOrMore,
     32   cl::desc("Minimum number of extenders to trigger replacement"));
     33 
     34 static cl::opt<unsigned> ReplaceLimit("hexagon-cext-limit", cl::init(0),
     35   cl::Hidden, cl::ZeroOrMore, cl::desc("Maximum number of replacements"));
     36 
     37 namespace llvm {
     38   void initializeHexagonConstExtendersPass(PassRegistry&);
     39   FunctionPass *createHexagonConstExtenders();
     40 }
     41 
     42 static int32_t adjustUp(int32_t V, uint8_t A, uint8_t O) {
     43   assert(isPowerOf2_32(A));
     44   int32_t U = (V & -A) + O;
     45   return U >= V ? U : U+A;
     46 }
     47 
     48 static int32_t adjustDown(int32_t V, uint8_t A, uint8_t O) {
     49   assert(isPowerOf2_32(A));
     50   int32_t U = (V & -A) + O;
     51   return U <= V ? U : U-A;
     52 }
     53 
     54 namespace {
     55   struct OffsetRange {
     56     // The range of values between Min and Max that are of form Align*N+Offset,
     57     // for some integer N. Min and Max are required to be of that form as well,
     58     // except in the case of an empty range.
     59     int32_t Min = INT_MIN, Max = INT_MAX;
     60     uint8_t Align = 1;
     61     uint8_t Offset = 0;
     62 
     63     OffsetRange() = default;
     64     OffsetRange(int32_t L, int32_t H, uint8_t A, uint8_t O = 0)
     65       : Min(L), Max(H), Align(A), Offset(O) {}
     66     OffsetRange &intersect(OffsetRange A) {
     67       if (Align < A.Align)
     68         std::swap(*this, A);
     69 
     70       // Align >= A.Align.
     71       if (Offset >= A.Offset && (Offset - A.Offset) % A.Align == 0) {
     72         Min = adjustUp(std::max(Min, A.Min), Align, Offset);
     73         Max = adjustDown(std::min(Max, A.Max), Align, Offset);
     74       } else {
     75         // Make an empty range.
     76         Min = 0;
     77         Max = -1;
     78       }
     79       // Canonicalize empty ranges.
     80       if (Min > Max)
     81         std::tie(Min, Max, Align) = std::make_tuple(0, -1, 1);
     82       return *this;
     83     }
     84     OffsetRange &shift(int32_t S) {
     85       Min += S;
     86       Max += S;
     87       Offset = (Offset+S) % Align;
     88       return *this;
     89     }
     90     OffsetRange &extendBy(int32_t D) {
     91       // If D < 0, extend Min, otherwise extend Max.
     92       assert(D % Align == 0);
     93       if (D < 0)
     94         Min = (INT_MIN-D < Min) ? Min+D : INT_MIN;
     95       else
     96         Max = (INT_MAX-D > Max) ? Max+D : INT_MAX;
     97       return *this;
     98     }
     99     bool empty() const {
    100       return Min > Max;
    101     }
    102     bool contains(int32_t V) const {
    103       return Min <= V && V <= Max && (V-Offset) % Align == 0;
    104     }
    105     bool operator==(const OffsetRange &R) const {
    106       return Min == R.Min && Max == R.Max && Align == R.Align;
    107     }
    108     bool operator!=(const OffsetRange &R) const {
    109       return !operator==(R);
    110     }
    111     bool operator<(const OffsetRange &R) const {
    112       if (Min != R.Min)
    113         return Min < R.Min;
    114       if (Max != R.Max)
    115         return Max < R.Max;
    116       return Align < R.Align;
    117     }
    118     static OffsetRange zero() { return {0, 0, 1}; }
    119   };
    120 
    121   struct RangeTree {
    122     struct Node {
    123       Node(const OffsetRange &R) : MaxEnd(R.Max), Range(R) {}
    124       unsigned Height = 1;
    125       unsigned Count = 1;
    126       int32_t MaxEnd;
    127       const OffsetRange &Range;
    128       Node *Left = nullptr, *Right = nullptr;
    129     };
    130 
    131     Node *Root = nullptr;
    132 
    133     void add(const OffsetRange &R) {
    134       Root = add(Root, R);
    135     }
    136     void erase(const Node *N) {
    137       Root = remove(Root, N);
    138       delete N;
    139     }
    140     void order(SmallVectorImpl<Node*> &Seq) const {
    141       order(Root, Seq);
    142     }
    143     SmallVector<Node*,8> nodesWith(int32_t P, bool CheckAlign = true) {
    144       SmallVector<Node*,8> Nodes;
    145       nodesWith(Root, P, CheckAlign, Nodes);
    146       return Nodes;
    147     }
    148     void dump() const;
    149     ~RangeTree() {
    150       SmallVector<Node*,8> Nodes;
    151       order(Nodes);
    152       for (Node *N : Nodes)
    153         delete N;
    154     }
    155 
    156   private:
    157     void dump(const Node *N) const;
    158     void order(Node *N, SmallVectorImpl<Node*> &Seq) const;
    159     void nodesWith(Node *N, int32_t P, bool CheckA,
    160                    SmallVectorImpl<Node*> &Seq) const;
    161 
    162     Node *add(Node *N, const OffsetRange &R);
    163     Node *remove(Node *N, const Node *D);
    164     Node *rotateLeft(Node *Lower, Node *Higher);
    165     Node *rotateRight(Node *Lower, Node *Higher);
    166     unsigned height(Node *N) {
    167       return N != nullptr ? N->Height : 0;
    168     }
    169     Node *update(Node *N) {
    170       assert(N != nullptr);
    171       N->Height = 1 + std::max(height(N->Left), height(N->Right));
    172       if (N->Left)
    173         N->MaxEnd = std::max(N->MaxEnd, N->Left->MaxEnd);
    174       if (N->Right)
    175         N->MaxEnd = std::max(N->MaxEnd, N->Right->MaxEnd);
    176       return N;
    177     }
    178     Node *rebalance(Node *N) {
    179       assert(N != nullptr);
    180       int32_t Balance = height(N->Right) - height(N->Left);
    181       if (Balance < -1)
    182         return rotateRight(N->Left, N);
    183       if (Balance > 1)
    184         return rotateLeft(N->Right, N);
    185       return N;
    186     }
    187   };
    188 
    189   struct Loc {
    190     MachineBasicBlock *Block = nullptr;
    191     MachineBasicBlock::iterator At;
    192 
    193     Loc(MachineBasicBlock *B, MachineBasicBlock::iterator It)
    194       : Block(B), At(It) {
    195       if (B->end() == It) {
    196         Pos = -1;
    197       } else {
    198         assert(It->getParent() == B);
    199         Pos = std::distance(B->begin(), It);
    200       }
    201     }
    202     bool operator<(Loc A) const {
    203       if (Block != A.Block)
    204         return Block->getNumber() < A.Block->getNumber();
    205       if (A.Pos == -1)
    206         return Pos != A.Pos;
    207       return Pos != -1 && Pos < A.Pos;
    208     }
    209   private:
    210     int Pos = 0;
    211   };
    212 
    213   struct HexagonConstExtenders : public MachineFunctionPass {
    214     static char ID;
    215     HexagonConstExtenders() : MachineFunctionPass(ID) {}
    216 
    217     void getAnalysisUsage(AnalysisUsage &AU) const override {
    218       AU.addRequired<MachineDominatorTree>();
    219       AU.addPreserved<MachineDominatorTree>();
    220       MachineFunctionPass::getAnalysisUsage(AU);
    221     }
    222 
    223     StringRef getPassName() const override {
    224       return "Hexagon constant-extender optimization";
    225     }
    226     bool runOnMachineFunction(MachineFunction &MF) override;
    227 
    228   private:
    229     struct Register {
    230       Register() = default;
    231       Register(unsigned R, unsigned S) : Reg(R), Sub(S) {}
    232       Register(const MachineOperand &Op)
    233         : Reg(Op.getReg()), Sub(Op.getSubReg()) {}
    234       Register &operator=(const MachineOperand &Op) {
    235         if (Op.isReg()) {
    236           Reg = Op.getReg();
    237           Sub = Op.getSubReg();
    238         } else if (Op.isFI()) {
    239           Reg = TargetRegisterInfo::index2StackSlot(Op.getIndex());
    240         }
    241         return *this;
    242       }
    243       bool isVReg() const {
    244         return Reg != 0 && !TargetRegisterInfo::isStackSlot(Reg) &&
    245                TargetRegisterInfo::isVirtualRegister(Reg);
    246       }
    247       bool isSlot() const {
    248         return Reg != 0 && TargetRegisterInfo::isStackSlot(Reg);
    249       }
    250       operator MachineOperand() const {
    251         if (isVReg())
    252           return MachineOperand::CreateReg(Reg, /*Def*/false, /*Imp*/false,
    253                           /*Kill*/false, /*Dead*/false, /*Undef*/false,
    254                           /*EarlyClobber*/false, Sub);
    255         if (TargetRegisterInfo::isStackSlot(Reg)) {
    256           int FI = TargetRegisterInfo::stackSlot2Index(Reg);
    257           return MachineOperand::CreateFI(FI);
    258         }
    259         llvm_unreachable("Cannot create MachineOperand");
    260       }
    261       bool operator==(Register R) const { return Reg == R.Reg && Sub == R.Sub; }
    262       bool operator!=(Register R) const { return !operator==(R); }
    263       bool operator<(Register R) const {
    264         // For std::map.
    265         return Reg < R.Reg || (Reg == R.Reg && Sub < R.Sub);
    266       }
    267       unsigned Reg = 0, Sub = 0;
    268     };
    269 
    270     struct ExtExpr {
    271       // A subexpression in which the extender is used. In general, this
    272       // represents an expression where adding D to the extender will be
    273       // equivalent to adding D to the expression as a whole. In other
    274       // words, expr(add(##V,D) = add(expr(##V),D).
    275 
    276       // The original motivation for this are the io/ur addressing modes,
    277       // where the offset is extended. Consider the io example:
    278       // In memw(Rs+##V), the ##V could be replaced by a register Rt to
    279       // form the rr mode: memw(Rt+Rs<<0). In such case, however, the
    280       // register Rt must have exactly the value of ##V. If there was
    281       // another instruction memw(Rs+##V+4), it would need a different Rt.
    282       // Now, if Rt was initialized as "##V+Rs<<0", both of these
    283       // instructions could use the same Rt, just with different offsets.
    284       // Here it's clear that "initializer+4" should be the same as if
    285       // the offset 4 was added to the ##V in the initializer.
    286 
    287       // The only kinds of expressions that support the requirement of
    288       // commuting with addition are addition and subtraction from ##V.
    289       // Include shifting the Rs to account for the ur addressing mode:
    290       //   ##Val + Rs << S
    291       //   ##Val - Rs
    292       Register Rs;
    293       unsigned S = 0;
    294       bool Neg = false;
    295 
    296       ExtExpr() = default;
    297       ExtExpr(Register RS, bool NG, unsigned SH) : Rs(RS), S(SH), Neg(NG) {}
    298       // Expression is trivial if it does not modify the extender.
    299       bool trivial() const {
    300         return Rs.Reg == 0;
    301       }
    302       bool operator==(const ExtExpr &Ex) const {
    303         return Rs == Ex.Rs && S == Ex.S && Neg == Ex.Neg;
    304       }
    305       bool operator!=(const ExtExpr &Ex) const {
    306         return !operator==(Ex);
    307       }
    308       bool operator<(const ExtExpr &Ex) const {
    309         if (Rs != Ex.Rs)
    310           return Rs < Ex.Rs;
    311         if (S != Ex.S)
    312           return S < Ex.S;
    313         return !Neg && Ex.Neg;
    314       }
    315     };
    316 
    317     struct ExtDesc {
    318       MachineInstr *UseMI = nullptr;
    319       unsigned OpNum = -1u;
    320       // The subexpression in which the extender is used (e.g. address
    321       // computation).
    322       ExtExpr Expr;
    323       // Optional register that is assigned the value of Expr.
    324       Register Rd;
    325       // Def means that the output of the instruction may differ from the
    326       // original by a constant c, and that the difference can be corrected
    327       // by adding/subtracting c in all users of the defined register.
    328       bool IsDef = false;
    329 
    330       MachineOperand &getOp() {
    331         return UseMI->getOperand(OpNum);
    332       }
    333       const MachineOperand &getOp() const {
    334         return UseMI->getOperand(OpNum);
    335       }
    336     };
    337 
    338     struct ExtRoot {
    339       union {
    340         const ConstantFP *CFP;  // MO_FPImmediate
    341         const char *SymbolName; // MO_ExternalSymbol
    342         const GlobalValue *GV;  // MO_GlobalAddress
    343         const BlockAddress *BA; // MO_BlockAddress
    344         int64_t ImmVal;         // MO_Immediate, MO_TargetIndex,
    345                                 // and MO_ConstantPoolIndex
    346       } V;
    347       unsigned Kind;            // Same as in MachineOperand.
    348       unsigned char TF;         // TargetFlags.
    349 
    350       ExtRoot(const MachineOperand &Op);
    351       bool operator==(const ExtRoot &ER) const {
    352         return Kind == ER.Kind && V.ImmVal == ER.V.ImmVal;
    353       }
    354       bool operator!=(const ExtRoot &ER) const {
    355         return !operator==(ER);
    356       }
    357       bool operator<(const ExtRoot &ER) const;
    358     };
    359 
    360     struct ExtValue : public ExtRoot {
    361       int32_t Offset;
    362 
    363       ExtValue(const MachineOperand &Op);
    364       ExtValue(const ExtDesc &ED) : ExtValue(ED.getOp()) {}
    365       ExtValue(const ExtRoot &ER, int32_t Off) : ExtRoot(ER), Offset(Off) {}
    366       bool operator<(const ExtValue &EV) const;
    367       bool operator==(const ExtValue &EV) const {
    368         return ExtRoot(*this) == ExtRoot(EV) && Offset == EV.Offset;
    369       }
    370       bool operator!=(const ExtValue &EV) const {
    371         return !operator==(EV);
    372       }
    373       explicit operator MachineOperand() const;
    374     };
    375 
    376     using IndexList = SetVector<unsigned>;
    377     using ExtenderInit = std::pair<ExtValue, ExtExpr>;
    378     using AssignmentMap = std::map<ExtenderInit, IndexList>;
    379     using LocDefMap = std::map<Loc, IndexList>;
    380 
    381     const HexagonInstrInfo *HII = nullptr;
    382     const HexagonRegisterInfo *HRI = nullptr;
    383     MachineDominatorTree *MDT = nullptr;
    384     MachineRegisterInfo *MRI = nullptr;
    385     std::vector<ExtDesc> Extenders;
    386     std::vector<unsigned> NewRegs;
    387 
    388     bool isStoreImmediate(unsigned Opc) const;
    389     bool isRegOffOpcode(unsigned ExtOpc) const ;
    390     unsigned getRegOffOpcode(unsigned ExtOpc) const;
    391     unsigned getDirectRegReplacement(unsigned ExtOpc) const;
    392     OffsetRange getOffsetRange(Register R, const MachineInstr &MI) const;
    393     OffsetRange getOffsetRange(const ExtDesc &ED) const;
    394     OffsetRange getOffsetRange(Register Rd) const;
    395 
    396     void recordExtender(MachineInstr &MI, unsigned OpNum);
    397     void collectInstr(MachineInstr &MI);
    398     void collect(MachineFunction &MF);
    399     void assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
    400                      AssignmentMap &IMap);
    401     void calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
    402                             LocDefMap &Defs);
    403     Register insertInitializer(Loc DefL, const ExtenderInit &ExtI);
    404     bool replaceInstrExact(const ExtDesc &ED, Register ExtR);
    405     bool replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
    406                           Register ExtR, int32_t &Diff);
    407     bool replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI);
    408     bool replaceExtenders(const AssignmentMap &IMap);
    409 
    410     unsigned getOperandIndex(const MachineInstr &MI,
    411                              const MachineOperand &Op) const;
    412     const MachineOperand &getPredicateOp(const MachineInstr &MI) const;
    413     const MachineOperand &getLoadResultOp(const MachineInstr &MI) const;
    414     const MachineOperand &getStoredValueOp(const MachineInstr &MI) const;
    415 
    416     friend struct PrintRegister;
    417     friend struct PrintExpr;
    418     friend struct PrintInit;
    419     friend struct PrintIMap;
    420     friend raw_ostream &operator<< (raw_ostream &OS,
    421                                     const struct PrintRegister &P);
    422     friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintExpr &P);
    423     friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintInit &P);
    424     friend raw_ostream &operator<< (raw_ostream &OS, const ExtDesc &ED);
    425     friend raw_ostream &operator<< (raw_ostream &OS, const ExtRoot &ER);
    426     friend raw_ostream &operator<< (raw_ostream &OS, const ExtValue &EV);
    427     friend raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR);
    428     friend raw_ostream &operator<< (raw_ostream &OS, const struct PrintIMap &P);
    429   };
    430 
    431   using HCE = HexagonConstExtenders;
    432 
    433   LLVM_ATTRIBUTE_UNUSED
    434   raw_ostream &operator<< (raw_ostream &OS, const OffsetRange &OR) {
    435     if (OR.Min > OR.Max)
    436       OS << '!';
    437     OS << '[' << OR.Min << ',' << OR.Max << "]a" << unsigned(OR.Align)
    438        << '+' << unsigned(OR.Offset);
    439     return OS;
    440   }
    441 
    442   struct PrintRegister {
    443     PrintRegister(HCE::Register R, const HexagonRegisterInfo &I)
    444       : Rs(R), HRI(I) {}
    445     HCE::Register Rs;
    446     const HexagonRegisterInfo &HRI;
    447   };
    448 
    449   LLVM_ATTRIBUTE_UNUSED
    450   raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &P) {
    451     if (P.Rs.Reg != 0)
    452       OS << printReg(P.Rs.Reg, &P.HRI, P.Rs.Sub);
    453     else
    454       OS << "noreg";
    455     return OS;
    456   }
    457 
    458   struct PrintExpr {
    459     PrintExpr(const HCE::ExtExpr &E, const HexagonRegisterInfo &I)
    460       : Ex(E), HRI(I) {}
    461     const HCE::ExtExpr &Ex;
    462     const HexagonRegisterInfo &HRI;
    463   };
    464 
    465   LLVM_ATTRIBUTE_UNUSED
    466   raw_ostream &operator<< (raw_ostream &OS, const PrintExpr &P) {
    467     OS << "## " << (P.Ex.Neg ? "- " : "+ ");
    468     if (P.Ex.Rs.Reg != 0)
    469       OS << printReg(P.Ex.Rs.Reg, &P.HRI, P.Ex.Rs.Sub);
    470     else
    471       OS << "__";
    472     OS << " << " << P.Ex.S;
    473     return OS;
    474   }
    475 
    476   struct PrintInit {
    477     PrintInit(const HCE::ExtenderInit &EI, const HexagonRegisterInfo &I)
    478       : ExtI(EI), HRI(I) {}
    479     const HCE::ExtenderInit &ExtI;
    480     const HexagonRegisterInfo &HRI;
    481   };
    482 
    483   LLVM_ATTRIBUTE_UNUSED
    484   raw_ostream &operator<< (raw_ostream &OS, const PrintInit &P) {
    485     OS << '[' << P.ExtI.first << ", "
    486        << PrintExpr(P.ExtI.second, P.HRI) << ']';
    487     return OS;
    488   }
    489 
    490   LLVM_ATTRIBUTE_UNUSED
    491   raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtDesc &ED) {
    492     assert(ED.OpNum != -1u);
    493     const MachineBasicBlock &MBB = *ED.getOp().getParent()->getParent();
    494     const MachineFunction &MF = *MBB.getParent();
    495     const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
    496     OS << "bb#" << MBB.getNumber() << ": ";
    497     if (ED.Rd.Reg != 0)
    498       OS << printReg(ED.Rd.Reg, &HRI, ED.Rd.Sub);
    499     else
    500       OS << "__";
    501     OS << " = " << PrintExpr(ED.Expr, HRI);
    502     if (ED.IsDef)
    503       OS << ", def";
    504     return OS;
    505   }
    506 
    507   LLVM_ATTRIBUTE_UNUSED
    508   raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtRoot &ER) {
    509     switch (ER.Kind) {
    510       case MachineOperand::MO_Immediate:
    511         OS << "imm:" << ER.V.ImmVal;
    512         break;
    513       case MachineOperand::MO_FPImmediate:
    514         OS << "fpi:" << *ER.V.CFP;
    515         break;
    516       case MachineOperand::MO_ExternalSymbol:
    517         OS << "sym:" << *ER.V.SymbolName;
    518         break;
    519       case MachineOperand::MO_GlobalAddress:
    520         OS << "gad:" << ER.V.GV->getName();
    521         break;
    522       case MachineOperand::MO_BlockAddress:
    523         OS << "blk:" << *ER.V.BA;
    524         break;
    525       case MachineOperand::MO_TargetIndex:
    526         OS << "tgi:" << ER.V.ImmVal;
    527         break;
    528       case MachineOperand::MO_ConstantPoolIndex:
    529         OS << "cpi:" << ER.V.ImmVal;
    530         break;
    531       case MachineOperand::MO_JumpTableIndex:
    532         OS << "jti:" << ER.V.ImmVal;
    533         break;
    534       default:
    535         OS << "???:" << ER.V.ImmVal;
    536         break;
    537     }
    538     return OS;
    539   }
    540 
    541   LLVM_ATTRIBUTE_UNUSED
    542   raw_ostream &operator<< (raw_ostream &OS, const HCE::ExtValue &EV) {
    543     OS << HCE::ExtRoot(EV) << "  off:" << EV.Offset;
    544     return OS;
    545   }
    546 
    547   struct PrintIMap {
    548     PrintIMap(const HCE::AssignmentMap &M, const HexagonRegisterInfo &I)
    549       : IMap(M), HRI(I) {}
    550     const HCE::AssignmentMap &IMap;
    551     const HexagonRegisterInfo &HRI;
    552   };
    553 
    554   LLVM_ATTRIBUTE_UNUSED
    555   raw_ostream &operator<< (raw_ostream &OS, const PrintIMap &P) {
    556     OS << "{\n";
    557     for (const std::pair<HCE::ExtenderInit,HCE::IndexList> &Q : P.IMap) {
    558       OS << "  " << PrintInit(Q.first, P.HRI) << " -> {";
    559       for (unsigned I : Q.second)
    560         OS << ' ' << I;
    561       OS << " }\n";
    562     }
    563     OS << "}\n";
    564     return OS;
    565   }
    566 }
    567 
    568 INITIALIZE_PASS_BEGIN(HexagonConstExtenders, "hexagon-cext-opt",
    569       "Hexagon constant-extender optimization", false, false)
    570 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    571 INITIALIZE_PASS_END(HexagonConstExtenders, "hexagon-cext-opt",
    572       "Hexagon constant-extender optimization", false, false)
    573 
    574 static unsigned ReplaceCounter = 0;
    575 
    576 char HCE::ID = 0;
    577 
    578 #ifndef NDEBUG
    579 LLVM_DUMP_METHOD void RangeTree::dump() const {
    580   dbgs() << "Root: " << Root << '\n';
    581   if (Root)
    582     dump(Root);
    583 }
    584 
    585 LLVM_DUMP_METHOD void RangeTree::dump(const Node *N) const {
    586   dbgs() << "Node: " << N << '\n';
    587   dbgs() << "  Height: " << N->Height << '\n';
    588   dbgs() << "  Count: " << N->Count << '\n';
    589   dbgs() << "  MaxEnd: " << N->MaxEnd << '\n';
    590   dbgs() << "  Range: " << N->Range << '\n';
    591   dbgs() << "  Left: " << N->Left << '\n';
    592   dbgs() << "  Right: " << N->Right << "\n\n";
    593 
    594   if (N->Left)
    595     dump(N->Left);
    596   if (N->Right)
    597     dump(N->Right);
    598 }
    599 #endif
    600 
    601 void RangeTree::order(Node *N, SmallVectorImpl<Node*> &Seq) const {
    602   if (N == nullptr)
    603     return;
    604   order(N->Left, Seq);
    605   Seq.push_back(N);
    606   order(N->Right, Seq);
    607 }
    608 
    609 void RangeTree::nodesWith(Node *N, int32_t P, bool CheckA,
    610       SmallVectorImpl<Node*> &Seq) const {
    611   if (N == nullptr || N->MaxEnd < P)
    612     return;
    613   nodesWith(N->Left, P, CheckA, Seq);
    614   if (N->Range.Min <= P) {
    615     if ((CheckA && N->Range.contains(P)) || (!CheckA && P <= N->Range.Max))
    616       Seq.push_back(N);
    617     nodesWith(N->Right, P, CheckA, Seq);
    618   }
    619 }
    620 
    621 RangeTree::Node *RangeTree::add(Node *N, const OffsetRange &R) {
    622   if (N == nullptr)
    623     return new Node(R);
    624 
    625   if (N->Range == R) {
    626     N->Count++;
    627     return N;
    628   }
    629 
    630   if (R < N->Range)
    631     N->Left = add(N->Left, R);
    632   else
    633     N->Right = add(N->Right, R);
    634   return rebalance(update(N));
    635 }
    636 
    637 RangeTree::Node *RangeTree::remove(Node *N, const Node *D) {
    638   assert(N != nullptr);
    639 
    640   if (N != D) {
    641     assert(N->Range != D->Range && "N and D should not be equal");
    642     if (D->Range < N->Range)
    643       N->Left = remove(N->Left, D);
    644     else
    645       N->Right = remove(N->Right, D);
    646     return rebalance(update(N));
    647   }
    648 
    649   // We got to the node we need to remove. If any of its children are
    650   // missing, simply replace it with the other child.
    651   if (N->Left == nullptr || N->Right == nullptr)
    652     return (N->Left == nullptr) ? N->Right : N->Left;
    653 
    654   // Find the rightmost child of N->Left, remove it and plug it in place
    655   // of N.
    656   Node *M = N->Left;
    657   while (M->Right)
    658     M = M->Right;
    659   M->Left = remove(N->Left, M);
    660   M->Right = N->Right;
    661   return rebalance(update(M));
    662 }
    663 
    664 RangeTree::Node *RangeTree::rotateLeft(Node *Lower, Node *Higher) {
    665   assert(Higher->Right == Lower);
    666   // The Lower node is on the right from Higher. Make sure that Lower's
    667   // balance is greater to the right. Otherwise the rotation will create
    668   // an unbalanced tree again.
    669   if (height(Lower->Left) > height(Lower->Right))
    670     Lower = rotateRight(Lower->Left, Lower);
    671   assert(height(Lower->Left) <= height(Lower->Right));
    672   Higher->Right = Lower->Left;
    673   update(Higher);
    674   Lower->Left = Higher;
    675   update(Lower);
    676   return Lower;
    677 }
    678 
    679 RangeTree::Node *RangeTree::rotateRight(Node *Lower, Node *Higher) {
    680   assert(Higher->Left == Lower);
    681   // The Lower node is on the left from Higher. Make sure that Lower's
    682   // balance is greater to the left. Otherwise the rotation will create
    683   // an unbalanced tree again.
    684   if (height(Lower->Left) < height(Lower->Right))
    685     Lower = rotateLeft(Lower->Right, Lower);
    686   assert(height(Lower->Left) >= height(Lower->Right));
    687   Higher->Left = Lower->Right;
    688   update(Higher);
    689   Lower->Right = Higher;
    690   update(Lower);
    691   return Lower;
    692 }
    693 
    694 
    695 HCE::ExtRoot::ExtRoot(const MachineOperand &Op) {
    696   // Always store ImmVal, since it's the field used for comparisons.
    697   V.ImmVal = 0;
    698   if (Op.isImm())
    699     ; // Keep 0. Do not use Op.getImm() for value here (treat 0 as the root).
    700   else if (Op.isFPImm())
    701     V.CFP = Op.getFPImm();
    702   else if (Op.isSymbol())
    703     V.SymbolName = Op.getSymbolName();
    704   else if (Op.isGlobal())
    705     V.GV = Op.getGlobal();
    706   else if (Op.isBlockAddress())
    707     V.BA = Op.getBlockAddress();
    708   else if (Op.isCPI() || Op.isTargetIndex() || Op.isJTI())
    709     V.ImmVal = Op.getIndex();
    710   else
    711     llvm_unreachable("Unexpected operand type");
    712 
    713   Kind = Op.getType();
    714   TF = Op.getTargetFlags();
    715 }
    716 
    717 bool HCE::ExtRoot::operator< (const HCE::ExtRoot &ER) const {
    718   if (Kind != ER.Kind)
    719     return Kind < ER.Kind;
    720   switch (Kind) {
    721     case MachineOperand::MO_Immediate:
    722     case MachineOperand::MO_TargetIndex:
    723     case MachineOperand::MO_ConstantPoolIndex:
    724     case MachineOperand::MO_JumpTableIndex:
    725       return V.ImmVal < ER.V.ImmVal;
    726     case MachineOperand::MO_FPImmediate: {
    727       const APFloat &ThisF = V.CFP->getValueAPF();
    728       const APFloat &OtherF = ER.V.CFP->getValueAPF();
    729       return ThisF.bitcastToAPInt().ult(OtherF.bitcastToAPInt());
    730     }
    731     case MachineOperand::MO_ExternalSymbol:
    732       return StringRef(V.SymbolName) < StringRef(ER.V.SymbolName);
    733     case MachineOperand::MO_GlobalAddress: {
    734       // Global values may not have names, so compare their positions
    735       // in the parent module.
    736       const Module &M = *V.GV->getParent();
    737       auto FindPos = [&M] (const GlobalValue &V) {
    738         unsigned P = 0;
    739         for (const GlobalValue &T : M.global_values()) {
    740           if (&T == &V)
    741             return P;
    742           P++;
    743         }
    744         llvm_unreachable("Global value not found in module");
    745       };
    746       return FindPos(*V.GV) < FindPos(*ER.V.GV);
    747     }
    748     case MachineOperand::MO_BlockAddress: {
    749       const BasicBlock *ThisB = V.BA->getBasicBlock();
    750       const BasicBlock *OtherB = ER.V.BA->getBasicBlock();
    751       assert(ThisB->getParent() == OtherB->getParent());
    752       const Function &F = *ThisB->getParent();
    753       return std::distance(F.begin(), ThisB->getIterator()) <
    754              std::distance(F.begin(), OtherB->getIterator());
    755     }
    756   }
    757   return V.ImmVal < ER.V.ImmVal;
    758 }
    759 
    760 HCE::ExtValue::ExtValue(const MachineOperand &Op) : ExtRoot(Op) {
    761   if (Op.isImm())
    762     Offset = Op.getImm();
    763   else if (Op.isFPImm() || Op.isJTI())
    764     Offset = 0;
    765   else if (Op.isSymbol() || Op.isGlobal() || Op.isBlockAddress() ||
    766            Op.isCPI() || Op.isTargetIndex())
    767     Offset = Op.getOffset();
    768   else
    769     llvm_unreachable("Unexpected operand type");
    770 }
    771 
    772 bool HCE::ExtValue::operator< (const HCE::ExtValue &EV) const {
    773   const ExtRoot &ER = *this;
    774   if (!(ER == ExtRoot(EV)))
    775     return ER < EV;
    776   return Offset < EV.Offset;
    777 }
    778 
    779 HCE::ExtValue::operator MachineOperand() const {
    780   switch (Kind) {
    781     case MachineOperand::MO_Immediate:
    782       return MachineOperand::CreateImm(V.ImmVal + Offset);
    783     case MachineOperand::MO_FPImmediate:
    784       assert(Offset == 0);
    785       return MachineOperand::CreateFPImm(V.CFP);
    786     case MachineOperand::MO_ExternalSymbol:
    787       assert(Offset == 0);
    788       return MachineOperand::CreateES(V.SymbolName, TF);
    789     case MachineOperand::MO_GlobalAddress:
    790       return MachineOperand::CreateGA(V.GV, Offset, TF);
    791     case MachineOperand::MO_BlockAddress:
    792       return MachineOperand::CreateBA(V.BA, Offset, TF);
    793     case MachineOperand::MO_TargetIndex:
    794       return MachineOperand::CreateTargetIndex(V.ImmVal, Offset, TF);
    795     case MachineOperand::MO_ConstantPoolIndex:
    796       return MachineOperand::CreateCPI(V.ImmVal, Offset, TF);
    797     case MachineOperand::MO_JumpTableIndex:
    798       assert(Offset == 0);
    799     default:
    800       llvm_unreachable("Unhandled kind");
    801  }
    802 }
    803 
    804 bool HCE::isStoreImmediate(unsigned Opc) const {
    805   switch (Opc) {
    806     case Hexagon::S4_storeirbt_io:
    807     case Hexagon::S4_storeirbf_io:
    808     case Hexagon::S4_storeirht_io:
    809     case Hexagon::S4_storeirhf_io:
    810     case Hexagon::S4_storeirit_io:
    811     case Hexagon::S4_storeirif_io:
    812     case Hexagon::S4_storeirb_io:
    813     case Hexagon::S4_storeirh_io:
    814     case Hexagon::S4_storeiri_io:
    815       return true;
    816     default:
    817       break;
    818   }
    819   return false;
    820 }
    821 
    822 bool HCE::isRegOffOpcode(unsigned Opc) const {
    823   switch (Opc) {
    824     case Hexagon::L2_loadrub_io:
    825     case Hexagon::L2_loadrb_io:
    826     case Hexagon::L2_loadruh_io:
    827     case Hexagon::L2_loadrh_io:
    828     case Hexagon::L2_loadri_io:
    829     case Hexagon::L2_loadrd_io:
    830     case Hexagon::L2_loadbzw2_io:
    831     case Hexagon::L2_loadbzw4_io:
    832     case Hexagon::L2_loadbsw2_io:
    833     case Hexagon::L2_loadbsw4_io:
    834     case Hexagon::L2_loadalignh_io:
    835     case Hexagon::L2_loadalignb_io:
    836     case Hexagon::L2_ploadrubt_io:
    837     case Hexagon::L2_ploadrubf_io:
    838     case Hexagon::L2_ploadrbt_io:
    839     case Hexagon::L2_ploadrbf_io:
    840     case Hexagon::L2_ploadruht_io:
    841     case Hexagon::L2_ploadruhf_io:
    842     case Hexagon::L2_ploadrht_io:
    843     case Hexagon::L2_ploadrhf_io:
    844     case Hexagon::L2_ploadrit_io:
    845     case Hexagon::L2_ploadrif_io:
    846     case Hexagon::L2_ploadrdt_io:
    847     case Hexagon::L2_ploadrdf_io:
    848     case Hexagon::S2_storerb_io:
    849     case Hexagon::S2_storerh_io:
    850     case Hexagon::S2_storerf_io:
    851     case Hexagon::S2_storeri_io:
    852     case Hexagon::S2_storerd_io:
    853     case Hexagon::S2_pstorerbt_io:
    854     case Hexagon::S2_pstorerbf_io:
    855     case Hexagon::S2_pstorerht_io:
    856     case Hexagon::S2_pstorerhf_io:
    857     case Hexagon::S2_pstorerft_io:
    858     case Hexagon::S2_pstorerff_io:
    859     case Hexagon::S2_pstorerit_io:
    860     case Hexagon::S2_pstorerif_io:
    861     case Hexagon::S2_pstorerdt_io:
    862     case Hexagon::S2_pstorerdf_io:
    863     case Hexagon::A2_addi:
    864       return true;
    865     default:
    866       break;
    867   }
    868   return false;
    869 }
    870 
    871 unsigned HCE::getRegOffOpcode(unsigned ExtOpc) const {
    872   // If there exists an instruction that takes a register and offset,
    873   // that corresponds to the ExtOpc, return it, otherwise return 0.
    874   using namespace Hexagon;
    875   switch (ExtOpc) {
    876     case A2_tfrsi:    return A2_addi;
    877     default:
    878       break;
    879   }
    880   const MCInstrDesc &D = HII->get(ExtOpc);
    881   if (D.mayLoad() || D.mayStore()) {
    882     uint64_t F = D.TSFlags;
    883     unsigned AM = (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
    884     switch (AM) {
    885       case HexagonII::Absolute:
    886       case HexagonII::AbsoluteSet:
    887       case HexagonII::BaseLongOffset:
    888         switch (ExtOpc) {
    889           case PS_loadrubabs:
    890           case L4_loadrub_ap:
    891           case L4_loadrub_ur:     return L2_loadrub_io;
    892           case PS_loadrbabs:
    893           case L4_loadrb_ap:
    894           case L4_loadrb_ur:      return L2_loadrb_io;
    895           case PS_loadruhabs:
    896           case L4_loadruh_ap:
    897           case L4_loadruh_ur:     return L2_loadruh_io;
    898           case PS_loadrhabs:
    899           case L4_loadrh_ap:
    900           case L4_loadrh_ur:      return L2_loadrh_io;
    901           case PS_loadriabs:
    902           case L4_loadri_ap:
    903           case L4_loadri_ur:      return L2_loadri_io;
    904           case PS_loadrdabs:
    905           case L4_loadrd_ap:
    906           case L4_loadrd_ur:      return L2_loadrd_io;
    907           case L4_loadbzw2_ap:
    908           case L4_loadbzw2_ur:    return L2_loadbzw2_io;
    909           case L4_loadbzw4_ap:
    910           case L4_loadbzw4_ur:    return L2_loadbzw4_io;
    911           case L4_loadbsw2_ap:
    912           case L4_loadbsw2_ur:    return L2_loadbsw2_io;
    913           case L4_loadbsw4_ap:
    914           case L4_loadbsw4_ur:    return L2_loadbsw4_io;
    915           case L4_loadalignh_ap:
    916           case L4_loadalignh_ur:  return L2_loadalignh_io;
    917           case L4_loadalignb_ap:
    918           case L4_loadalignb_ur:  return L2_loadalignb_io;
    919           case L4_ploadrubt_abs:  return L2_ploadrubt_io;
    920           case L4_ploadrubf_abs:  return L2_ploadrubf_io;
    921           case L4_ploadrbt_abs:   return L2_ploadrbt_io;
    922           case L4_ploadrbf_abs:   return L2_ploadrbf_io;
    923           case L4_ploadruht_abs:  return L2_ploadruht_io;
    924           case L4_ploadruhf_abs:  return L2_ploadruhf_io;
    925           case L4_ploadrht_abs:   return L2_ploadrht_io;
    926           case L4_ploadrhf_abs:   return L2_ploadrhf_io;
    927           case L4_ploadrit_abs:   return L2_ploadrit_io;
    928           case L4_ploadrif_abs:   return L2_ploadrif_io;
    929           case L4_ploadrdt_abs:   return L2_ploadrdt_io;
    930           case L4_ploadrdf_abs:   return L2_ploadrdf_io;
    931           case PS_storerbabs:
    932           case S4_storerb_ap:
    933           case S4_storerb_ur:     return S2_storerb_io;
    934           case PS_storerhabs:
    935           case S4_storerh_ap:
    936           case S4_storerh_ur:     return S2_storerh_io;
    937           case PS_storerfabs:
    938           case S4_storerf_ap:
    939           case S4_storerf_ur:     return S2_storerf_io;
    940           case PS_storeriabs:
    941           case S4_storeri_ap:
    942           case S4_storeri_ur:     return S2_storeri_io;
    943           case PS_storerdabs:
    944           case S4_storerd_ap:
    945           case S4_storerd_ur:     return S2_storerd_io;
    946           case S4_pstorerbt_abs:  return S2_pstorerbt_io;
    947           case S4_pstorerbf_abs:  return S2_pstorerbf_io;
    948           case S4_pstorerht_abs:  return S2_pstorerht_io;
    949           case S4_pstorerhf_abs:  return S2_pstorerhf_io;
    950           case S4_pstorerft_abs:  return S2_pstorerft_io;
    951           case S4_pstorerff_abs:  return S2_pstorerff_io;
    952           case S4_pstorerit_abs:  return S2_pstorerit_io;
    953           case S4_pstorerif_abs:  return S2_pstorerif_io;
    954           case S4_pstorerdt_abs:  return S2_pstorerdt_io;
    955           case S4_pstorerdf_abs:  return S2_pstorerdf_io;
    956           default:
    957             break;
    958         }
    959         break;
    960       case HexagonII::BaseImmOffset:
    961         if (!isStoreImmediate(ExtOpc))
    962           return ExtOpc;
    963         break;
    964       default:
    965         break;
    966     }
    967   }
    968   return 0;
    969 }
    970 
    971 unsigned HCE::getDirectRegReplacement(unsigned ExtOpc) const {
    972   switch (ExtOpc) {
    973     case Hexagon::A2_addi:          return Hexagon::A2_add;
    974     case Hexagon::A2_andir:         return Hexagon::A2_and;
    975     case Hexagon::A2_combineii:     return Hexagon::A4_combineri;
    976     case Hexagon::A2_orir:          return Hexagon::A2_or;
    977     case Hexagon::A2_paddif:        return Hexagon::A2_paddf;
    978     case Hexagon::A2_paddit:        return Hexagon::A2_paddt;
    979     case Hexagon::A2_subri:         return Hexagon::A2_sub;
    980     case Hexagon::A2_tfrsi:         return TargetOpcode::COPY;
    981     case Hexagon::A4_cmpbeqi:       return Hexagon::A4_cmpbeq;
    982     case Hexagon::A4_cmpbgti:       return Hexagon::A4_cmpbgt;
    983     case Hexagon::A4_cmpbgtui:      return Hexagon::A4_cmpbgtu;
    984     case Hexagon::A4_cmpheqi:       return Hexagon::A4_cmpheq;
    985     case Hexagon::A4_cmphgti:       return Hexagon::A4_cmphgt;
    986     case Hexagon::A4_cmphgtui:      return Hexagon::A4_cmphgtu;
    987     case Hexagon::A4_combineii:     return Hexagon::A4_combineir;
    988     case Hexagon::A4_combineir:     return TargetOpcode::REG_SEQUENCE;
    989     case Hexagon::A4_combineri:     return TargetOpcode::REG_SEQUENCE;
    990     case Hexagon::A4_rcmpeqi:       return Hexagon::A4_rcmpeq;
    991     case Hexagon::A4_rcmpneqi:      return Hexagon::A4_rcmpneq;
    992     case Hexagon::C2_cmoveif:       return Hexagon::A2_tfrpf;
    993     case Hexagon::C2_cmoveit:       return Hexagon::A2_tfrpt;
    994     case Hexagon::C2_cmpeqi:        return Hexagon::C2_cmpeq;
    995     case Hexagon::C2_cmpgti:        return Hexagon::C2_cmpgt;
    996     case Hexagon::C2_cmpgtui:       return Hexagon::C2_cmpgtu;
    997     case Hexagon::C2_muxii:         return Hexagon::C2_muxir;
    998     case Hexagon::C2_muxir:         return Hexagon::C2_mux;
    999     case Hexagon::C2_muxri:         return Hexagon::C2_mux;
   1000     case Hexagon::C4_cmpltei:       return Hexagon::C4_cmplte;
   1001     case Hexagon::C4_cmplteui:      return Hexagon::C4_cmplteu;
   1002     case Hexagon::C4_cmpneqi:       return Hexagon::C4_cmpneq;
   1003     case Hexagon::M2_accii:         return Hexagon::M2_acci;        // T -> T
   1004     /* No M2_macsin */
   1005     case Hexagon::M2_macsip:        return Hexagon::M2_maci;        // T -> T
   1006     case Hexagon::M2_mpysin:        return Hexagon::M2_mpyi;
   1007     case Hexagon::M2_mpysip:        return Hexagon::M2_mpyi;
   1008     case Hexagon::M2_mpysmi:        return Hexagon::M2_mpyi;
   1009     case Hexagon::M2_naccii:        return Hexagon::M2_nacci;       // T -> T
   1010     case Hexagon::M4_mpyri_addi:    return Hexagon::M4_mpyri_addr;
   1011     case Hexagon::M4_mpyri_addr:    return Hexagon::M4_mpyrr_addr;  // _ -> T
   1012     case Hexagon::M4_mpyrr_addi:    return Hexagon::M4_mpyrr_addr;  // _ -> T
   1013     case Hexagon::S4_addaddi:       return Hexagon::M2_acci;        // _ -> T
   1014     case Hexagon::S4_addi_asl_ri:   return Hexagon::S2_asl_i_r_acc; // T -> T
   1015     case Hexagon::S4_addi_lsr_ri:   return Hexagon::S2_lsr_i_r_acc; // T -> T
   1016     case Hexagon::S4_andi_asl_ri:   return Hexagon::S2_asl_i_r_and; // T -> T
   1017     case Hexagon::S4_andi_lsr_ri:   return Hexagon::S2_lsr_i_r_and; // T -> T
   1018     case Hexagon::S4_ori_asl_ri:    return Hexagon::S2_asl_i_r_or;  // T -> T
   1019     case Hexagon::S4_ori_lsr_ri:    return Hexagon::S2_lsr_i_r_or;  // T -> T
   1020     case Hexagon::S4_subaddi:       return Hexagon::M2_subacc;      // _ -> T
   1021     case Hexagon::S4_subi_asl_ri:   return Hexagon::S2_asl_i_r_nac; // T -> T
   1022     case Hexagon::S4_subi_lsr_ri:   return Hexagon::S2_lsr_i_r_nac; // T -> T
   1023 
   1024     // Store-immediates:
   1025     case Hexagon::S4_storeirbf_io:  return Hexagon::S2_pstorerbf_io;
   1026     case Hexagon::S4_storeirb_io:   return Hexagon::S2_storerb_io;
   1027     case Hexagon::S4_storeirbt_io:  return Hexagon::S2_pstorerbt_io;
   1028     case Hexagon::S4_storeirhf_io:  return Hexagon::S2_pstorerhf_io;
   1029     case Hexagon::S4_storeirh_io:   return Hexagon::S2_storerh_io;
   1030     case Hexagon::S4_storeirht_io:  return Hexagon::S2_pstorerht_io;
   1031     case Hexagon::S4_storeirif_io:  return Hexagon::S2_pstorerif_io;
   1032     case Hexagon::S4_storeiri_io:   return Hexagon::S2_storeri_io;
   1033     case Hexagon::S4_storeirit_io:  return Hexagon::S2_pstorerit_io;
   1034 
   1035     default:
   1036       break;
   1037   }
   1038   return 0;
   1039 }
   1040 
   1041 // Return the allowable deviation from the current value of Rb (i.e. the
   1042 // range of values that can be added to the current value) which the
   1043 // instruction MI can accommodate.
   1044 // The instruction MI is a user of register Rb, which is defined via an
   1045 // extender. It may be possible for MI to be tweaked to work for a register
   1046 // defined with a slightly different value. For example
   1047 //   ... = L2_loadrub_io Rb, 1
   1048 // can be modifed to be
   1049 //   ... = L2_loadrub_io Rb', 0
   1050 // if Rb' = Rb+1.
   1051 // The range for Rb would be [Min+1, Max+1], where [Min, Max] is a range
   1052 // for L2_loadrub with offset 0. That means that Rb could be replaced with
   1053 // Rc, where Rc-Rb belongs to [Min+1, Max+1].
   1054 OffsetRange HCE::getOffsetRange(Register Rb, const MachineInstr &MI) const {
   1055   unsigned Opc = MI.getOpcode();
   1056   // Instructions that are constant-extended may be replaced with something
   1057   // else that no longer offers the same range as the original.
   1058   if (!isRegOffOpcode(Opc) || HII->isConstExtended(MI))
   1059     return OffsetRange::zero();
   1060 
   1061   if (Opc == Hexagon::A2_addi) {
   1062     const MachineOperand &Op1 = MI.getOperand(1), &Op2 = MI.getOperand(2);
   1063     if (Rb != Register(Op1) || !Op2.isImm())
   1064       return OffsetRange::zero();
   1065     OffsetRange R = { -(1<<15)+1, (1<<15)-1, 1 };
   1066     return R.shift(Op2.getImm());
   1067   }
   1068 
   1069   // HII::getBaseAndOffsetPosition returns the increment position as "offset".
   1070   if (HII->isPostIncrement(MI))
   1071     return OffsetRange::zero();
   1072 
   1073   const MCInstrDesc &D = HII->get(Opc);
   1074   assert(D.mayLoad() || D.mayStore());
   1075 
   1076   unsigned BaseP, OffP;
   1077   if (!HII->getBaseAndOffsetPosition(MI, BaseP, OffP) ||
   1078       Rb != Register(MI.getOperand(BaseP)) ||
   1079       !MI.getOperand(OffP).isImm())
   1080     return OffsetRange::zero();
   1081 
   1082   uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) &
   1083                   HexagonII::MemAccesSizeMask;
   1084   uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F));
   1085   unsigned L = Log2_32(A);
   1086   unsigned S = 10+L;  // sint11_L
   1087   int32_t Min = -alignDown((1<<S)-1, A);
   1088 
   1089   // The range will be shifted by Off. To prefer non-negative offsets,
   1090   // adjust Max accordingly.
   1091   int32_t Off = MI.getOperand(OffP).getImm();
   1092   int32_t Max = Off >= 0 ? 0 : -Off;
   1093 
   1094   OffsetRange R = { Min, Max, A };
   1095   return R.shift(Off);
   1096 }
   1097 
   1098 // Return the allowable deviation from the current value of the extender ED,
   1099 // for which the instruction corresponding to ED can be modified without
   1100 // using an extender.
   1101 // The instruction uses the extender directly. It will be replaced with
   1102 // another instruction, say MJ, where the extender will be replaced with a
   1103 // register. MJ can allow some variability with respect to the value of
   1104 // that register, as is the case with indexed memory instructions.
   1105 OffsetRange HCE::getOffsetRange(const ExtDesc &ED) const {
   1106   // The only way that there can be a non-zero range available is if
   1107   // the instruction using ED will be converted to an indexed memory
   1108   // instruction.
   1109   unsigned IdxOpc = getRegOffOpcode(ED.UseMI->getOpcode());
   1110   switch (IdxOpc) {
   1111     case 0:
   1112       return OffsetRange::zero();
   1113     case Hexagon::A2_addi:    // s16
   1114       return { -32767, 32767, 1 };
   1115     case Hexagon::A2_subri:   // s10
   1116       return { -511, 511, 1 };
   1117   }
   1118 
   1119   if (!ED.UseMI->mayLoad() && !ED.UseMI->mayStore())
   1120     return OffsetRange::zero();
   1121   const MCInstrDesc &D = HII->get(IdxOpc);
   1122   uint64_t F = (D.TSFlags >> HexagonII::MemAccessSizePos) &
   1123                   HexagonII::MemAccesSizeMask;
   1124   uint8_t A = HexagonII::getMemAccessSizeInBytes(HexagonII::MemAccessSize(F));
   1125   unsigned L = Log2_32(A);
   1126   unsigned S = 10+L;  // sint11_L
   1127   int32_t Min = -alignDown((1<<S)-1, A);
   1128   int32_t Max = 0;  // Force non-negative offsets.
   1129   return { Min, Max, A };
   1130 }
   1131 
   1132 // Get the allowable deviation from the current value of Rd by checking
   1133 // all uses of Rd.
   1134 OffsetRange HCE::getOffsetRange(Register Rd) const {
   1135   OffsetRange Range;
   1136   for (const MachineOperand &Op : MRI->use_operands(Rd.Reg)) {
   1137     // Make sure that the register being used by this operand is identical
   1138     // to the register that was defined: using a different subregister
   1139     // precludes any non-trivial range.
   1140     if (Rd != Register(Op))
   1141       return OffsetRange::zero();
   1142     Range.intersect(getOffsetRange(Rd, *Op.getParent()));
   1143   }
   1144   return Range;
   1145 }
   1146 
   1147 void HCE::recordExtender(MachineInstr &MI, unsigned OpNum) {
   1148   unsigned Opc = MI.getOpcode();
   1149   ExtDesc ED;
   1150   ED.OpNum = OpNum;
   1151 
   1152   bool IsLoad = MI.mayLoad();
   1153   bool IsStore = MI.mayStore();
   1154 
   1155   // Fixed stack slots have negative indexes, and they cannot be used
   1156   // with TRI::stackSlot2Index and TRI::index2StackSlot. This is somewhat
   1157   // unfortunate, but should not be a frequent thing.
   1158   for (MachineOperand &Op : MI.operands())
   1159     if (Op.isFI() && Op.getIndex() < 0)
   1160       return;
   1161 
   1162   if (IsLoad || IsStore) {
   1163     unsigned AM = HII->getAddrMode(MI);
   1164     switch (AM) {
   1165       // (Re: ##Off + Rb<<S) = Rd: ##Val
   1166       case HexagonII::Absolute:       // (__: ## + __<<_)
   1167         break;
   1168       case HexagonII::AbsoluteSet:    // (Rd: ## + __<<_)
   1169         ED.Rd = MI.getOperand(OpNum-1);
   1170         ED.IsDef = true;
   1171         break;
   1172       case HexagonII::BaseImmOffset:  // (__: ## + Rs<<0)
   1173         // Store-immediates are treated as non-memory operations, since
   1174         // it's the value being stored that is extended (as opposed to
   1175         // a part of the address).
   1176         if (!isStoreImmediate(Opc))
   1177           ED.Expr.Rs = MI.getOperand(OpNum-1);
   1178         break;
   1179       case HexagonII::BaseLongOffset: // (__: ## + Rs<<S)
   1180         ED.Expr.Rs = MI.getOperand(OpNum-2);
   1181         ED.Expr.S = MI.getOperand(OpNum-1).getImm();
   1182         break;
   1183       default:
   1184         llvm_unreachable("Unhandled memory instruction");
   1185     }
   1186   } else {
   1187     switch (Opc) {
   1188       case Hexagon::A2_tfrsi:         // (Rd: ## + __<<_)
   1189         ED.Rd = MI.getOperand(0);
   1190         ED.IsDef = true;
   1191         break;
   1192       case Hexagon::A2_combineii:     // (Rd: ## + __<<_)
   1193       case Hexagon::A4_combineir:
   1194         ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_hi };
   1195         ED.IsDef = true;
   1196         break;
   1197       case Hexagon::A4_combineri:     // (Rd: ## + __<<_)
   1198         ED.Rd = { MI.getOperand(0).getReg(), Hexagon::isub_lo };
   1199         ED.IsDef = true;
   1200         break;
   1201       case Hexagon::A2_addi:          // (Rd: ## + Rs<<0)
   1202         ED.Rd = MI.getOperand(0);
   1203         ED.Expr.Rs = MI.getOperand(OpNum-1);
   1204         break;
   1205       case Hexagon::M2_accii:         // (__: ## + Rs<<0)
   1206       case Hexagon::M2_naccii:
   1207       case Hexagon::S4_addaddi:
   1208         ED.Expr.Rs = MI.getOperand(OpNum-1);
   1209         break;
   1210       case Hexagon::A2_subri:         // (Rd: ## - Rs<<0)
   1211         ED.Rd = MI.getOperand(0);
   1212         ED.Expr.Rs = MI.getOperand(OpNum+1);
   1213         ED.Expr.Neg = true;
   1214         break;
   1215       case Hexagon::S4_subaddi:       // (__: ## - Rs<<0)
   1216         ED.Expr.Rs = MI.getOperand(OpNum+1);
   1217         ED.Expr.Neg = true;
   1218         break;
   1219       default:                        // (__: ## + __<<_)
   1220         break;
   1221     }
   1222   }
   1223 
   1224   ED.UseMI = &MI;
   1225   Extenders.push_back(ED);
   1226 }
   1227 
   1228 void HCE::collectInstr(MachineInstr &MI) {
   1229   if (!HII->isConstExtended(MI))
   1230     return;
   1231 
   1232   // Skip some non-convertible instructions.
   1233   unsigned Opc = MI.getOpcode();
   1234   switch (Opc) {
   1235     case Hexagon::M2_macsin:  // There is no Rx -= mpyi(Rs,Rt).
   1236     case Hexagon::C4_addipc:
   1237     case Hexagon::S4_or_andi:
   1238     case Hexagon::S4_or_andix:
   1239     case Hexagon::S4_or_ori:
   1240       return;
   1241   }
   1242   recordExtender(MI, HII->getCExtOpNum(MI));
   1243 }
   1244 
   1245 void HCE::collect(MachineFunction &MF) {
   1246   Extenders.clear();
   1247   for (MachineBasicBlock &MBB : MF)
   1248     for (MachineInstr &MI : MBB)
   1249       collectInstr(MI);
   1250 }
   1251 
   1252 void HCE::assignInits(const ExtRoot &ER, unsigned Begin, unsigned End,
   1253       AssignmentMap &IMap) {
   1254   // Sanity check: make sure that all extenders in the range [Begin..End)
   1255   // share the same root ER.
   1256   for (unsigned I = Begin; I != End; ++I)
   1257     assert(ER == ExtRoot(Extenders[I].getOp()));
   1258 
   1259   // Construct the list of ranges, such that for each P in Ranges[I],
   1260   // a register Reg = ER+P can be used in place of Extender[I]. If the
   1261   // instruction allows, uses in the form of Reg+Off are considered
   1262   // (here, Off = required_value - P).
   1263   std::vector<OffsetRange> Ranges(End-Begin);
   1264 
   1265   // For each extender that is a def, visit all uses of the defined register,
   1266   // and produce an offset range that works for all uses. The def doesn't
   1267   // have to be checked, because it can become dead if all uses can be updated
   1268   // to use a different reg/offset.
   1269   for (unsigned I = Begin; I != End; ++I) {
   1270     const ExtDesc &ED = Extenders[I];
   1271     if (!ED.IsDef)
   1272       continue;
   1273     ExtValue EV(ED);
   1274     LLVM_DEBUG(dbgs() << " =" << I << ". " << EV << "  " << ED << '\n');
   1275     assert(ED.Rd.Reg != 0);
   1276     Ranges[I-Begin] = getOffsetRange(ED.Rd).shift(EV.Offset);
   1277     // A2_tfrsi is a special case: it will be replaced with A2_addi, which
   1278     // has a 16-bit signed offset. This means that A2_tfrsi not only has a
   1279     // range coming from its uses, but also from the fact that its replacement
   1280     // has a range as well.
   1281     if (ED.UseMI->getOpcode() == Hexagon::A2_tfrsi) {
   1282       int32_t D = alignDown(32767, Ranges[I-Begin].Align); // XXX hardcoded
   1283       Ranges[I-Begin].extendBy(-D).extendBy(D);
   1284     }
   1285   }
   1286 
   1287   // Visit all non-def extenders. For each one, determine the offset range
   1288   // available for it.
   1289   for (unsigned I = Begin; I != End; ++I) {
   1290     const ExtDesc &ED = Extenders[I];
   1291     if (ED.IsDef)
   1292       continue;
   1293     ExtValue EV(ED);
   1294     LLVM_DEBUG(dbgs() << "  " << I << ". " << EV << "  " << ED << '\n');
   1295     OffsetRange Dev = getOffsetRange(ED);
   1296     Ranges[I-Begin].intersect(Dev.shift(EV.Offset));
   1297   }
   1298 
   1299   // Here for each I there is a corresponding Range[I]. Construct the
   1300   // inverse map, that to each range will assign the set of indexes in
   1301   // [Begin..End) that this range corresponds to.
   1302   std::map<OffsetRange, IndexList> RangeMap;
   1303   for (unsigned I = Begin; I != End; ++I)
   1304     RangeMap[Ranges[I-Begin]].insert(I);
   1305 
   1306   LLVM_DEBUG({
   1307     dbgs() << "Ranges\n";
   1308     for (unsigned I = Begin; I != End; ++I)
   1309       dbgs() << "  " << I << ". " << Ranges[I-Begin] << '\n';
   1310     dbgs() << "RangeMap\n";
   1311     for (auto &P : RangeMap) {
   1312       dbgs() << "  " << P.first << " ->";
   1313       for (unsigned I : P.second)
   1314         dbgs() << ' ' << I;
   1315       dbgs() << '\n';
   1316     }
   1317   });
   1318 
   1319   // Select the definition points, and generate the assignment between
   1320   // these points and the uses.
   1321 
   1322   // For each candidate offset, keep a pair CandData consisting of
   1323   // the total number of ranges containing that candidate, and the
   1324   // vector of corresponding RangeTree nodes.
   1325   using CandData = std::pair<unsigned, SmallVector<RangeTree::Node*,8>>;
   1326   std::map<int32_t, CandData> CandMap;
   1327 
   1328   RangeTree Tree;
   1329   for (const OffsetRange &R : Ranges)
   1330     Tree.add(R);
   1331   SmallVector<RangeTree::Node*,8> Nodes;
   1332   Tree.order(Nodes);
   1333 
   1334   auto MaxAlign = [](const SmallVectorImpl<RangeTree::Node*> &Nodes,
   1335                      uint8_t Align, uint8_t Offset) {
   1336     for (RangeTree::Node *N : Nodes) {
   1337       if (N->Range.Align <= Align || N->Range.Offset < Offset)
   1338         continue;
   1339       if ((N->Range.Offset - Offset) % Align != 0)
   1340         continue;
   1341       Align = N->Range.Align;
   1342       Offset = N->Range.Offset;
   1343     }
   1344     return std::make_pair(Align, Offset);
   1345   };
   1346 
   1347   // Construct the set of all potential definition points from the endpoints
   1348   // of the ranges. If a given endpoint also belongs to a different range,
   1349   // but with a higher alignment, also consider the more-highly-aligned
   1350   // value of this endpoint.
   1351   std::set<int32_t> CandSet;
   1352   for (RangeTree::Node *N : Nodes) {
   1353     const OffsetRange &R = N->Range;
   1354     auto P0 = MaxAlign(Tree.nodesWith(R.Min, false), R.Align, R.Offset);
   1355     CandSet.insert(R.Min);
   1356     if (R.Align < P0.first)
   1357       CandSet.insert(adjustUp(R.Min, P0.first, P0.second));
   1358     auto P1 = MaxAlign(Tree.nodesWith(R.Max, false), R.Align, R.Offset);
   1359     CandSet.insert(R.Max);
   1360     if (R.Align < P1.first)
   1361       CandSet.insert(adjustDown(R.Max, P1.first, P1.second));
   1362   }
   1363 
   1364   // Build the assignment map: candidate C -> { list of extender indexes }.
   1365   // This has to be done iteratively:
   1366   // - pick the candidate that covers the maximum number of extenders,
   1367   // - add the candidate to the map,
   1368   // - remove the extenders from the pool.
   1369   while (true) {
   1370     using CMap = std::map<int32_t,unsigned>;
   1371     CMap Counts;
   1372     for (auto It = CandSet.begin(), Et = CandSet.end(); It != Et; ) {
   1373       auto &&V = Tree.nodesWith(*It);
   1374       unsigned N = std::accumulate(V.begin(), V.end(), 0u,
   1375                     [](unsigned Acc, const RangeTree::Node *N) {
   1376                       return Acc + N->Count;
   1377                     });
   1378       if (N != 0)
   1379         Counts.insert({*It, N});
   1380       It = (N != 0) ? std::next(It) : CandSet.erase(It);
   1381     }
   1382     if (Counts.empty())
   1383       break;
   1384 
   1385     // Find the best candidate with respect to the number of extenders covered.
   1386     auto BestIt = std::max_element(Counts.begin(), Counts.end(),
   1387                     [](const CMap::value_type &A, const CMap::value_type &B) {
   1388                       return A.second < B.second ||
   1389                              (A.second == B.second && A < B);
   1390                     });
   1391     int32_t Best = BestIt->first;
   1392     ExtValue BestV(ER, Best);
   1393     for (RangeTree::Node *N : Tree.nodesWith(Best)) {
   1394       for (unsigned I : RangeMap[N->Range])
   1395         IMap[{BestV,Extenders[I].Expr}].insert(I);
   1396       Tree.erase(N);
   1397     }
   1398   }
   1399 
   1400   LLVM_DEBUG(dbgs() << "IMap (before fixup) = " << PrintIMap(IMap, *HRI));
   1401 
   1402   // There is some ambiguity in what initializer should be used, if the
   1403   // descriptor's subexpression is non-trivial: it can be the entire
   1404   // subexpression (which is what has been done so far), or it can be
   1405   // the extender's value itself, if all corresponding extenders have the
   1406   // exact value of the initializer (i.e. require offset of 0).
   1407 
   1408   // To reduce the number of initializers, merge such special cases.
   1409   for (std::pair<const ExtenderInit,IndexList> &P : IMap) {
   1410     // Skip trivial initializers.
   1411     if (P.first.second.trivial())
   1412       continue;
   1413     // If the corresponding trivial initializer does not exist, skip this
   1414     // entry.
   1415     const ExtValue &EV = P.first.first;
   1416     AssignmentMap::iterator F = IMap.find({EV, ExtExpr()});
   1417     if (F == IMap.end())
   1418       continue;
   1419 
   1420     // Finally, check if all extenders have the same value as the initializer.
   1421     // Make sure that extenders that are a part of a stack address are not
   1422     // merged with those that aren't. Stack addresses need an offset field
   1423     // (to be used by frame index elimination), while non-stack expressions
   1424     // can be replaced with forms (such as rr) that do not have such a field.
   1425     // Example:
   1426     //
   1427     // Collected 3 extenders
   1428     //  =2. imm:0  off:32968  bb#2: %7 = ## + __ << 0, def
   1429     //   0. imm:0  off:267  bb#0: __ = ## + SS#1 << 0
   1430     //   1. imm:0  off:267  bb#1: __ = ## + SS#1 << 0
   1431     // Ranges
   1432     //   0. [-756,267]a1+0
   1433     //   1. [-756,267]a1+0
   1434     //   2. [201,65735]a1+0
   1435     // RangeMap
   1436     //   [-756,267]a1+0 -> 0 1
   1437     //   [201,65735]a1+0 -> 2
   1438     // IMap (before fixup) = {
   1439     //   [imm:0  off:267, ## + __ << 0] -> { 2 }
   1440     //   [imm:0  off:267, ## + SS#1 << 0] -> { 0 1 }
   1441     // }
   1442     // IMap (after fixup) = {
   1443     //   [imm:0  off:267, ## + __ << 0] -> { 2 0 1 }
   1444     //   [imm:0  off:267, ## + SS#1 << 0] -> { }
   1445     // }
   1446     // Inserted def in bb#0 for initializer: [imm:0  off:267, ## + __ << 0]
   1447     //   %12:intregs = A2_tfrsi 267
   1448     //
   1449     // The result was
   1450     //   %12:intregs = A2_tfrsi 267
   1451     //   S4_pstorerbt_rr %3, %12, %stack.1, 0, killed %4
   1452     // Which became
   1453     //   r0 = #267
   1454     //   if (p0.new) memb(r0+r29<<#4) = r2
   1455 
   1456     bool IsStack = any_of(F->second, [this](unsigned I) {
   1457                       return Extenders[I].Expr.Rs.isSlot();
   1458                    });
   1459     auto SameValue = [&EV,this,IsStack](unsigned I) {
   1460       const ExtDesc &ED = Extenders[I];
   1461       return ED.Expr.Rs.isSlot() == IsStack &&
   1462              ExtValue(ED).Offset == EV.Offset;
   1463     };
   1464     if (all_of(P.second, SameValue)) {
   1465       F->second.insert(P.second.begin(), P.second.end());
   1466       P.second.clear();
   1467     }
   1468   }
   1469 
   1470   LLVM_DEBUG(dbgs() << "IMap (after fixup) = " << PrintIMap(IMap, *HRI));
   1471 }
   1472 
   1473 void HCE::calculatePlacement(const ExtenderInit &ExtI, const IndexList &Refs,
   1474       LocDefMap &Defs) {
   1475   if (Refs.empty())
   1476     return;
   1477 
   1478   // The placement calculation is somewhat simple right now: it finds a
   1479   // single location for the def that dominates all refs. Since this may
   1480   // place the def far from the uses, producing several locations for
   1481   // defs that collectively dominate all refs could be better.
   1482   // For now only do the single one.
   1483   DenseSet<MachineBasicBlock*> Blocks;
   1484   DenseSet<MachineInstr*> RefMIs;
   1485   const ExtDesc &ED0 = Extenders[Refs[0]];
   1486   MachineBasicBlock *DomB = ED0.UseMI->getParent();
   1487   RefMIs.insert(ED0.UseMI);
   1488   Blocks.insert(DomB);
   1489   for (unsigned i = 1, e = Refs.size(); i != e; ++i) {
   1490     const ExtDesc &ED = Extenders[Refs[i]];
   1491     MachineBasicBlock *MBB = ED.UseMI->getParent();
   1492     RefMIs.insert(ED.UseMI);
   1493     DomB = MDT->findNearestCommonDominator(DomB, MBB);
   1494     Blocks.insert(MBB);
   1495   }
   1496 
   1497 #ifndef NDEBUG
   1498   // The block DomB should be dominated by the def of each register used
   1499   // in the initializer.
   1500   Register Rs = ExtI.second.Rs;  // Only one reg allowed now.
   1501   const MachineInstr *DefI = Rs.isVReg() ? MRI->getVRegDef(Rs.Reg) : nullptr;
   1502 
   1503   // This should be guaranteed given that the entire expression is used
   1504   // at each instruction in Refs. Add an assertion just in case.
   1505   assert(!DefI || MDT->dominates(DefI->getParent(), DomB));
   1506 #endif
   1507 
   1508   MachineBasicBlock::iterator It;
   1509   if (Blocks.count(DomB)) {
   1510     // Try to find the latest possible location for the def.
   1511     MachineBasicBlock::iterator End = DomB->end();
   1512     for (It = DomB->begin(); It != End; ++It)
   1513       if (RefMIs.count(&*It))
   1514         break;
   1515     assert(It != End && "Should have found a ref in DomB");
   1516   } else {
   1517     // DomB does not contain any refs.
   1518     It = DomB->getFirstTerminator();
   1519   }
   1520   Loc DefLoc(DomB, It);
   1521   Defs.emplace(DefLoc, Refs);
   1522 }
   1523 
   1524 HCE::Register HCE::insertInitializer(Loc DefL, const ExtenderInit &ExtI) {
   1525   unsigned DefR = MRI->createVirtualRegister(&Hexagon::IntRegsRegClass);
   1526   MachineBasicBlock &MBB = *DefL.Block;
   1527   MachineBasicBlock::iterator At = DefL.At;
   1528   DebugLoc dl = DefL.Block->findDebugLoc(DefL.At);
   1529   const ExtValue &EV = ExtI.first;
   1530   MachineOperand ExtOp(EV);
   1531 
   1532   const ExtExpr &Ex = ExtI.second;
   1533   const MachineInstr *InitI = nullptr;
   1534 
   1535   if (Ex.Rs.isSlot()) {
   1536     assert(Ex.S == 0 && "Cannot have a shift of a stack slot");
   1537     assert(!Ex.Neg && "Cannot subtract a stack slot");
   1538     // DefR = PS_fi Rb,##EV
   1539     InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::PS_fi), DefR)
   1540               .add(MachineOperand(Ex.Rs))
   1541               .add(ExtOp);
   1542   } else {
   1543     assert((Ex.Rs.Reg == 0 || Ex.Rs.isVReg()) && "Expecting virtual register");
   1544     if (Ex.trivial()) {
   1545       // DefR = ##EV
   1546       InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_tfrsi), DefR)
   1547                 .add(ExtOp);
   1548     } else if (Ex.S == 0) {
   1549       if (Ex.Neg) {
   1550         // DefR = sub(##EV,Rb)
   1551         InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_subri), DefR)
   1552                   .add(ExtOp)
   1553                   .add(MachineOperand(Ex.Rs));
   1554       } else {
   1555         // DefR = add(Rb,##EV)
   1556         InitI = BuildMI(MBB, At, dl, HII->get(Hexagon::A2_addi), DefR)
   1557                   .add(MachineOperand(Ex.Rs))
   1558                   .add(ExtOp);
   1559       }
   1560     } else {
   1561       unsigned NewOpc = Ex.Neg ? Hexagon::S4_subi_asl_ri
   1562                                : Hexagon::S4_addi_asl_ri;
   1563       // DefR = add(##EV,asl(Rb,S))
   1564       InitI = BuildMI(MBB, At, dl, HII->get(NewOpc), DefR)
   1565                 .add(ExtOp)
   1566                 .add(MachineOperand(Ex.Rs))
   1567                 .addImm(Ex.S);
   1568     }
   1569   }
   1570 
   1571   assert(InitI);
   1572   (void)InitI;
   1573   LLVM_DEBUG(dbgs() << "Inserted def in bb#" << MBB.getNumber()
   1574                     << " for initializer: " << PrintInit(ExtI, *HRI) << "\n  "
   1575                     << *InitI);
   1576   return { DefR, 0 };
   1577 }
   1578 
   1579 // Replace the extender at index Idx with the register ExtR.
   1580 bool HCE::replaceInstrExact(const ExtDesc &ED, Register ExtR) {
   1581   MachineInstr &MI = *ED.UseMI;
   1582   MachineBasicBlock &MBB = *MI.getParent();
   1583   MachineBasicBlock::iterator At = MI.getIterator();
   1584   DebugLoc dl = MI.getDebugLoc();
   1585   unsigned ExtOpc = MI.getOpcode();
   1586 
   1587   // With a few exceptions, direct replacement amounts to creating an
   1588   // instruction with a corresponding register opcode, with all operands
   1589   // the same, except for the register used in place of the extender.
   1590   unsigned RegOpc = getDirectRegReplacement(ExtOpc);
   1591 
   1592   if (RegOpc == TargetOpcode::REG_SEQUENCE) {
   1593     if (ExtOpc == Hexagon::A4_combineri)
   1594       BuildMI(MBB, At, dl, HII->get(RegOpc))
   1595         .add(MI.getOperand(0))
   1596         .add(MI.getOperand(1))
   1597         .addImm(Hexagon::isub_hi)
   1598         .add(MachineOperand(ExtR))
   1599         .addImm(Hexagon::isub_lo);
   1600     else if (ExtOpc == Hexagon::A4_combineir)
   1601       BuildMI(MBB, At, dl, HII->get(RegOpc))
   1602         .add(MI.getOperand(0))
   1603         .add(MachineOperand(ExtR))
   1604         .addImm(Hexagon::isub_hi)
   1605         .add(MI.getOperand(2))
   1606         .addImm(Hexagon::isub_lo);
   1607     else
   1608       llvm_unreachable("Unexpected opcode became REG_SEQUENCE");
   1609     MBB.erase(MI);
   1610     return true;
   1611   }
   1612   if (ExtOpc == Hexagon::C2_cmpgei || ExtOpc == Hexagon::C2_cmpgeui) {
   1613     unsigned NewOpc = ExtOpc == Hexagon::C2_cmpgei ? Hexagon::C2_cmplt
   1614                                                    : Hexagon::C2_cmpltu;
   1615     BuildMI(MBB, At, dl, HII->get(NewOpc))
   1616       .add(MI.getOperand(0))
   1617       .add(MachineOperand(ExtR))
   1618       .add(MI.getOperand(1));
   1619     MBB.erase(MI);
   1620     return true;
   1621   }
   1622 
   1623   if (RegOpc != 0) {
   1624     MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc));
   1625     unsigned RegN = ED.OpNum;
   1626     // Copy all operands except the one that has the extender.
   1627     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
   1628       if (i != RegN)
   1629         MIB.add(MI.getOperand(i));
   1630       else
   1631         MIB.add(MachineOperand(ExtR));
   1632     }
   1633     MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   1634     MBB.erase(MI);
   1635     return true;
   1636   }
   1637 
   1638   if ((MI.mayLoad() || MI.mayStore()) && !isStoreImmediate(ExtOpc)) {
   1639     // For memory instructions, there is an asymmetry in the addressing
   1640     // modes. Addressing modes allowing extenders can be replaced with
   1641     // addressing modes that use registers, but the order of operands
   1642     // (or even their number) may be different.
   1643     // Replacements:
   1644     //   BaseImmOffset (io)  -> BaseRegOffset (rr)
   1645     //   BaseLongOffset (ur) -> BaseRegOffset (rr)
   1646     unsigned RegOpc, Shift;
   1647     unsigned AM = HII->getAddrMode(MI);
   1648     if (AM == HexagonII::BaseImmOffset) {
   1649       RegOpc = HII->changeAddrMode_io_rr(ExtOpc);
   1650       Shift = 0;
   1651     } else if (AM == HexagonII::BaseLongOffset) {
   1652       // Loads:  Rd = L4_loadri_ur Rs, S, ##
   1653       // Stores: S4_storeri_ur Rs, S, ##, Rt
   1654       RegOpc = HII->changeAddrMode_ur_rr(ExtOpc);
   1655       Shift = MI.getOperand(MI.mayLoad() ? 2 : 1).getImm();
   1656     } else {
   1657       llvm_unreachable("Unexpected addressing mode");
   1658     }
   1659 #ifndef NDEBUG
   1660     if (RegOpc == -1u) {
   1661       dbgs() << "\nExtOpc: " << HII->getName(ExtOpc) << " has no rr version\n";
   1662       llvm_unreachable("No corresponding rr instruction");
   1663     }
   1664 #endif
   1665 
   1666     unsigned BaseP, OffP;
   1667     HII->getBaseAndOffsetPosition(MI, BaseP, OffP);
   1668 
   1669     // Build an rr instruction: (RegOff + RegBase<<0)
   1670     MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(RegOpc));
   1671     // First, add the def for loads.
   1672     if (MI.mayLoad())
   1673       MIB.add(getLoadResultOp(MI));
   1674     // Handle possible predication.
   1675     if (HII->isPredicated(MI))
   1676       MIB.add(getPredicateOp(MI));
   1677     // Build the address.
   1678     MIB.add(MachineOperand(ExtR));      // RegOff
   1679     MIB.add(MI.getOperand(BaseP));      // RegBase
   1680     MIB.addImm(Shift);                  // << Shift
   1681     // Add the stored value for stores.
   1682     if (MI.mayStore())
   1683       MIB.add(getStoredValueOp(MI));
   1684     MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   1685     MBB.erase(MI);
   1686     return true;
   1687   }
   1688 
   1689 #ifndef NDEBUG
   1690   dbgs() << '\n' << MI;
   1691 #endif
   1692   llvm_unreachable("Unhandled exact replacement");
   1693   return false;
   1694 }
   1695 
   1696 // Replace the extender ED with a form corresponding to the initializer ExtI.
   1697 bool HCE::replaceInstrExpr(const ExtDesc &ED, const ExtenderInit &ExtI,
   1698       Register ExtR, int32_t &Diff) {
   1699   MachineInstr &MI = *ED.UseMI;
   1700   MachineBasicBlock &MBB = *MI.getParent();
   1701   MachineBasicBlock::iterator At = MI.getIterator();
   1702   DebugLoc dl = MI.getDebugLoc();
   1703   unsigned ExtOpc = MI.getOpcode();
   1704 
   1705   if (ExtOpc == Hexagon::A2_tfrsi) {
   1706     // A2_tfrsi is a special case: it's replaced with A2_addi, which introduces
   1707     // another range. One range is the one that's common to all tfrsi's uses,
   1708     // this one is the range of immediates in A2_addi. When calculating ranges,
   1709     // the addi's 16-bit argument was included, so now we need to make it such
   1710     // that the produced value is in the range for the uses alone.
   1711     // Most of the time, simply adding Diff will make the addi produce exact
   1712     // result, but if Diff is outside of the 16-bit range, some adjustment
   1713     // will be needed.
   1714     unsigned IdxOpc = getRegOffOpcode(ExtOpc);
   1715     assert(IdxOpc == Hexagon::A2_addi);
   1716 
   1717     // Clamp Diff to the 16 bit range.
   1718     int32_t D = isInt<16>(Diff) ? Diff : (Diff > 0 ? 32767 : -32768);
   1719     BuildMI(MBB, At, dl, HII->get(IdxOpc))
   1720       .add(MI.getOperand(0))
   1721       .add(MachineOperand(ExtR))
   1722       .addImm(D);
   1723     Diff -= D;
   1724 #ifndef NDEBUG
   1725     // Make sure the output is within allowable range for uses.
   1726     // "Diff" is a difference in the "opposite direction", i.e. Ext - DefV,
   1727     // not DefV - Ext, as the getOffsetRange would calculate.
   1728     OffsetRange Uses = getOffsetRange(MI.getOperand(0));
   1729     if (!Uses.contains(-Diff))
   1730       dbgs() << "Diff: " << -Diff << " out of range " << Uses
   1731              << " for " << MI;
   1732     assert(Uses.contains(-Diff));
   1733 #endif
   1734     MBB.erase(MI);
   1735     return true;
   1736   }
   1737 
   1738   const ExtValue &EV = ExtI.first; (void)EV;
   1739   const ExtExpr &Ex = ExtI.second; (void)Ex;
   1740 
   1741   if (ExtOpc == Hexagon::A2_addi || ExtOpc == Hexagon::A2_subri) {
   1742     // If addi/subri are replaced with the exactly matching initializer,
   1743     // they amount to COPY.
   1744     // Check that the initializer is an exact match (for simplicity).
   1745 #ifndef NDEBUG
   1746     bool IsAddi = ExtOpc == Hexagon::A2_addi;
   1747     const MachineOperand &RegOp = MI.getOperand(IsAddi ? 1 : 2);
   1748     const MachineOperand &ImmOp = MI.getOperand(IsAddi ? 2 : 1);
   1749     assert(Ex.Rs == RegOp && EV == ImmOp && Ex.Neg != IsAddi &&
   1750            "Initializer mismatch");
   1751 #endif
   1752     BuildMI(MBB, At, dl, HII->get(TargetOpcode::COPY))
   1753       .add(MI.getOperand(0))
   1754       .add(MachineOperand(ExtR));
   1755     Diff = 0;
   1756     MBB.erase(MI);
   1757     return true;
   1758   }
   1759   if (ExtOpc == Hexagon::M2_accii || ExtOpc == Hexagon::M2_naccii ||
   1760       ExtOpc == Hexagon::S4_addaddi || ExtOpc == Hexagon::S4_subaddi) {
   1761     // M2_accii:    add(Rt,add(Rs,V)) (tied)
   1762     // M2_naccii:   sub(Rt,add(Rs,V))
   1763     // S4_addaddi:  add(Rt,add(Rs,V))
   1764     // S4_subaddi:  add(Rt,sub(V,Rs))
   1765     // Check that Rs and V match the initializer expression. The Rs+V is the
   1766     // combination that is considered "subexpression" for V, although Rx+V
   1767     // would also be valid.
   1768 #ifndef NDEBUG
   1769     bool IsSub = ExtOpc == Hexagon::S4_subaddi;
   1770     Register Rs = MI.getOperand(IsSub ? 3 : 2);
   1771     ExtValue V = MI.getOperand(IsSub ? 2 : 3);
   1772     assert(EV == V && Rs == Ex.Rs && IsSub == Ex.Neg && "Initializer mismatch");
   1773 #endif
   1774     unsigned NewOpc = ExtOpc == Hexagon::M2_naccii ? Hexagon::A2_sub
   1775                                                    : Hexagon::A2_add;
   1776     BuildMI(MBB, At, dl, HII->get(NewOpc))
   1777       .add(MI.getOperand(0))
   1778       .add(MI.getOperand(1))
   1779       .add(MachineOperand(ExtR));
   1780     MBB.erase(MI);
   1781     return true;
   1782   }
   1783 
   1784   if (MI.mayLoad() || MI.mayStore()) {
   1785     unsigned IdxOpc = getRegOffOpcode(ExtOpc);
   1786     assert(IdxOpc && "Expecting indexed opcode");
   1787     MachineInstrBuilder MIB = BuildMI(MBB, At, dl, HII->get(IdxOpc));
   1788     // Construct the new indexed instruction.
   1789     // First, add the def for loads.
   1790     if (MI.mayLoad())
   1791       MIB.add(getLoadResultOp(MI));
   1792     // Handle possible predication.
   1793     if (HII->isPredicated(MI))
   1794       MIB.add(getPredicateOp(MI));
   1795     // Build the address.
   1796     MIB.add(MachineOperand(ExtR));
   1797     MIB.addImm(Diff);
   1798     // Add the stored value for stores.
   1799     if (MI.mayStore())
   1800       MIB.add(getStoredValueOp(MI));
   1801     MIB.setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
   1802     MBB.erase(MI);
   1803     return true;
   1804   }
   1805 
   1806 #ifndef NDEBUG
   1807   dbgs() << '\n' << PrintInit(ExtI, *HRI) << "  " << MI;
   1808 #endif
   1809   llvm_unreachable("Unhandled expr replacement");
   1810   return false;
   1811 }
   1812 
   1813 bool HCE::replaceInstr(unsigned Idx, Register ExtR, const ExtenderInit &ExtI) {
   1814   if (ReplaceLimit.getNumOccurrences()) {
   1815     if (ReplaceLimit <= ReplaceCounter)
   1816       return false;
   1817     ++ReplaceCounter;
   1818   }
   1819   const ExtDesc &ED = Extenders[Idx];
   1820   assert((!ED.IsDef || ED.Rd.Reg != 0) && "Missing Rd for def");
   1821   const ExtValue &DefV = ExtI.first;
   1822   assert(ExtRoot(ExtValue(ED)) == ExtRoot(DefV) && "Extender root mismatch");
   1823   const ExtExpr &DefEx = ExtI.second;
   1824 
   1825   ExtValue EV(ED);
   1826   int32_t Diff = EV.Offset - DefV.Offset;
   1827   const MachineInstr &MI = *ED.UseMI;
   1828   LLVM_DEBUG(dbgs() << __func__ << " Idx:" << Idx << " ExtR:"
   1829                     << PrintRegister(ExtR, *HRI) << " Diff:" << Diff << '\n');
   1830 
   1831   // These two addressing modes must be converted into indexed forms
   1832   // regardless of what the initializer looks like.
   1833   bool IsAbs = false, IsAbsSet = false;
   1834   if (MI.mayLoad() || MI.mayStore()) {
   1835     unsigned AM = HII->getAddrMode(MI);
   1836     IsAbs = AM == HexagonII::Absolute;
   1837     IsAbsSet = AM == HexagonII::AbsoluteSet;
   1838   }
   1839 
   1840   // If it's a def, remember all operands that need to be updated.
   1841   // If ED is a def, and Diff is not 0, then all uses of the register Rd
   1842   // defined by ED must be in the form (Rd, imm), i.e. the immediate offset
   1843   // must follow the Rd in the operand list.
   1844   std::vector<std::pair<MachineInstr*,unsigned>> RegOps;
   1845   if (ED.IsDef && Diff != 0) {
   1846     for (MachineOperand &Op : MRI->use_operands(ED.Rd.Reg)) {
   1847       MachineInstr &UI = *Op.getParent();
   1848       RegOps.push_back({&UI, getOperandIndex(UI, Op)});
   1849     }
   1850   }
   1851 
   1852   // Replace the instruction.
   1853   bool Replaced = false;
   1854   if (Diff == 0 && DefEx.trivial() && !IsAbs && !IsAbsSet)
   1855     Replaced = replaceInstrExact(ED, ExtR);
   1856   else
   1857     Replaced = replaceInstrExpr(ED, ExtI, ExtR, Diff);
   1858 
   1859   if (Diff != 0 && Replaced && ED.IsDef) {
   1860     // Update offsets of the def's uses.
   1861     for (std::pair<MachineInstr*,unsigned> P : RegOps) {
   1862       unsigned J = P.second;
   1863       assert(P.first->getNumOperands() > J+1 &&
   1864              P.first->getOperand(J+1).isImm());
   1865       MachineOperand &ImmOp = P.first->getOperand(J+1);
   1866       ImmOp.setImm(ImmOp.getImm() + Diff);
   1867     }
   1868     // If it was an absolute-set instruction, the "set" part has been removed.
   1869     // ExtR will now be the register with the extended value, and since all
   1870     // users of Rd have been updated, all that needs to be done is to replace
   1871     // Rd with ExtR.
   1872     if (IsAbsSet) {
   1873       assert(ED.Rd.Sub == 0 && ExtR.Sub == 0);
   1874       MRI->replaceRegWith(ED.Rd.Reg, ExtR.Reg);
   1875     }
   1876   }
   1877 
   1878   return Replaced;
   1879 }
   1880 
   1881 bool HCE::replaceExtenders(const AssignmentMap &IMap) {
   1882   LocDefMap Defs;
   1883   bool Changed = false;
   1884 
   1885   for (const std::pair<ExtenderInit,IndexList> &P : IMap) {
   1886     const IndexList &Idxs = P.second;
   1887     if (Idxs.size() < CountThreshold)
   1888       continue;
   1889 
   1890     Defs.clear();
   1891     calculatePlacement(P.first, Idxs, Defs);
   1892     for (const std::pair<Loc,IndexList> &Q : Defs) {
   1893       Register DefR = insertInitializer(Q.first, P.first);
   1894       NewRegs.push_back(DefR.Reg);
   1895       for (unsigned I : Q.second)
   1896         Changed |= replaceInstr(I, DefR, P.first);
   1897     }
   1898   }
   1899   return Changed;
   1900 }
   1901 
   1902 unsigned HCE::getOperandIndex(const MachineInstr &MI,
   1903       const MachineOperand &Op) const {
   1904   for (unsigned i = 0, n = MI.getNumOperands(); i != n; ++i)
   1905     if (&MI.getOperand(i) == &Op)
   1906       return i;
   1907   llvm_unreachable("Not an operand of MI");
   1908 }
   1909 
   1910 const MachineOperand &HCE::getPredicateOp(const MachineInstr &MI) const {
   1911   assert(HII->isPredicated(MI));
   1912   for (const MachineOperand &Op : MI.operands()) {
   1913     if (!Op.isReg() || !Op.isUse() ||
   1914         MRI->getRegClass(Op.getReg()) != &Hexagon::PredRegsRegClass)
   1915       continue;
   1916     assert(Op.getSubReg() == 0 && "Predicate register with a subregister");
   1917     return Op;
   1918   }
   1919   llvm_unreachable("Predicate operand not found");
   1920 }
   1921 
   1922 const MachineOperand &HCE::getLoadResultOp(const MachineInstr &MI) const {
   1923   assert(MI.mayLoad());
   1924   return MI.getOperand(0);
   1925 }
   1926 
   1927 const MachineOperand &HCE::getStoredValueOp(const MachineInstr &MI) const {
   1928   assert(MI.mayStore());
   1929   return MI.getOperand(MI.getNumExplicitOperands()-1);
   1930 }
   1931 
   1932 bool HCE::runOnMachineFunction(MachineFunction &MF) {
   1933   if (skipFunction(MF.getFunction()))
   1934     return false;
   1935   LLVM_DEBUG(MF.print(dbgs() << "Before " << getPassName() << '\n', nullptr));
   1936 
   1937   HII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
   1938   HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
   1939   MDT = &getAnalysis<MachineDominatorTree>();
   1940   MRI = &MF.getRegInfo();
   1941   AssignmentMap IMap;
   1942 
   1943   collect(MF);
   1944   llvm::sort(Extenders.begin(), Extenders.end(),
   1945     [](const ExtDesc &A, const ExtDesc &B) {
   1946       return ExtValue(A) < ExtValue(B);
   1947     });
   1948 
   1949   bool Changed = false;
   1950   LLVM_DEBUG(dbgs() << "Collected " << Extenders.size() << " extenders\n");
   1951   for (unsigned I = 0, E = Extenders.size(); I != E; ) {
   1952     unsigned B = I;
   1953     const ExtRoot &T = Extenders[B].getOp();
   1954     while (I != E && ExtRoot(Extenders[I].getOp()) == T)
   1955       ++I;
   1956 
   1957     IMap.clear();
   1958     assignInits(T, B, I, IMap);
   1959     Changed |= replaceExtenders(IMap);
   1960   }
   1961 
   1962   LLVM_DEBUG({
   1963     if (Changed)
   1964       MF.print(dbgs() << "After " << getPassName() << '\n', nullptr);
   1965     else
   1966       dbgs() << "No changes\n";
   1967   });
   1968   return Changed;
   1969 }
   1970 
   1971 FunctionPass *llvm::createHexagonConstExtenders() {
   1972   return new HexagonConstExtenders();
   1973 }
   1974