Home | History | Annotate | Download | only in ARM
      1 //===-- ARMConstantIslandPass.cpp - ARM constant islands ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a pass that splits the constant pool up into 'islands'
     11 // which are scattered through-out the function.  This is required due to the
     12 // limited pc-relative displacements that ARM has.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "ARM.h"
     17 #include "ARMMachineFunctionInfo.h"
     18 #include "MCTargetDesc/ARMAddressingModes.h"
     19 #include "Thumb2InstrInfo.h"
     20 #include "llvm/ADT/STLExtras.h"
     21 #include "llvm/ADT/SmallSet.h"
     22 #include "llvm/ADT/SmallVector.h"
     23 #include "llvm/ADT/Statistic.h"
     24 #include "llvm/CodeGen/MachineConstantPool.h"
     25 #include "llvm/CodeGen/MachineFunctionPass.h"
     26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     27 #include "llvm/CodeGen/MachineRegisterInfo.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/Support/CommandLine.h"
     30 #include "llvm/Support/Debug.h"
     31 #include "llvm/Support/ErrorHandling.h"
     32 #include "llvm/Support/Format.h"
     33 #include "llvm/Support/raw_ostream.h"
     34 #include "llvm/Target/TargetMachine.h"
     35 #include <algorithm>
     36 using namespace llvm;
     37 
     38 #define DEBUG_TYPE "arm-cp-islands"
     39 
     40 STATISTIC(NumCPEs,       "Number of constpool entries");
     41 STATISTIC(NumSplit,      "Number of uncond branches inserted");
     42 STATISTIC(NumCBrFixed,   "Number of cond branches fixed");
     43 STATISTIC(NumUBrFixed,   "Number of uncond branches fixed");
     44 STATISTIC(NumTBs,        "Number of table branches generated");
     45 STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
     46 STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
     47 STATISTIC(NumCBZ,        "Number of CBZ / CBNZ formed");
     48 STATISTIC(NumJTMoved,    "Number of jump table destination blocks moved");
     49 STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
     50 
     51 
     52 static cl::opt<bool>
     53 AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
     54           cl::desc("Adjust basic block layout to better use TB[BH]"));
     55 
     56 static cl::opt<unsigned>
     57 CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
     58           cl::desc("The max number of iteration for converge"));
     59 
     60 
     61 /// UnknownPadding - Return the worst case padding that could result from
     62 /// unknown offset bits.  This does not include alignment padding caused by
     63 /// known offset bits.
     64 ///
     65 /// @param LogAlign log2(alignment)
     66 /// @param KnownBits Number of known low offset bits.
     67 static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
     68   if (KnownBits < LogAlign)
     69     return (1u << LogAlign) - (1u << KnownBits);
     70   return 0;
     71 }
     72 
     73 namespace {
     74   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
     75   /// requires constant pool entries to be scattered among the instructions
     76   /// inside a function.  To do this, it completely ignores the normal LLVM
     77   /// constant pool; instead, it places constants wherever it feels like with
     78   /// special instructions.
     79   ///
     80   /// The terminology used in this pass includes:
     81   ///   Islands - Clumps of constants placed in the function.
     82   ///   Water   - Potential places where an island could be formed.
     83   ///   CPE     - A constant pool entry that has been placed somewhere, which
     84   ///             tracks a list of users.
     85   class ARMConstantIslands : public MachineFunctionPass {
     86     /// BasicBlockInfo - Information about the offset and size of a single
     87     /// basic block.
     88     struct BasicBlockInfo {
     89       /// Offset - Distance from the beginning of the function to the beginning
     90       /// of this basic block.
     91       ///
     92       /// Offsets are computed assuming worst case padding before an aligned
     93       /// block. This means that subtracting basic block offsets always gives a
     94       /// conservative estimate of the real distance which may be smaller.
     95       ///
     96       /// Because worst case padding is used, the computed offset of an aligned
     97       /// block may not actually be aligned.
     98       unsigned Offset;
     99 
    100       /// Size - Size of the basic block in bytes.  If the block contains
    101       /// inline assembly, this is a worst case estimate.
    102       ///
    103       /// The size does not include any alignment padding whether from the
    104       /// beginning of the block, or from an aligned jump table at the end.
    105       unsigned Size;
    106 
    107       /// KnownBits - The number of low bits in Offset that are known to be
    108       /// exact.  The remaining bits of Offset are an upper bound.
    109       uint8_t KnownBits;
    110 
    111       /// Unalign - When non-zero, the block contains instructions (inline asm)
    112       /// of unknown size.  The real size may be smaller than Size bytes by a
    113       /// multiple of 1 << Unalign.
    114       uint8_t Unalign;
    115 
    116       /// PostAlign - When non-zero, the block terminator contains a .align
    117       /// directive, so the end of the block is aligned to 1 << PostAlign
    118       /// bytes.
    119       uint8_t PostAlign;
    120 
    121       BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
    122         PostAlign(0) {}
    123 
    124       /// Compute the number of known offset bits internally to this block.
    125       /// This number should be used to predict worst case padding when
    126       /// splitting the block.
    127       unsigned internalKnownBits() const {
    128         unsigned Bits = Unalign ? Unalign : KnownBits;
    129         // If the block size isn't a multiple of the known bits, assume the
    130         // worst case padding.
    131         if (Size & ((1u << Bits) - 1))
    132           Bits = countTrailingZeros(Size);
    133         return Bits;
    134       }
    135 
    136       /// Compute the offset immediately following this block.  If LogAlign is
    137       /// specified, return the offset the successor block will get if it has
    138       /// this alignment.
    139       unsigned postOffset(unsigned LogAlign = 0) const {
    140         unsigned PO = Offset + Size;
    141         unsigned LA = std::max(unsigned(PostAlign), LogAlign);
    142         if (!LA)
    143           return PO;
    144         // Add alignment padding from the terminator.
    145         return PO + UnknownPadding(LA, internalKnownBits());
    146       }
    147 
    148       /// Compute the number of known low bits of postOffset.  If this block
    149       /// contains inline asm, the number of known bits drops to the
    150       /// instruction alignment.  An aligned terminator may increase the number
    151       /// of know bits.
    152       /// If LogAlign is given, also consider the alignment of the next block.
    153       unsigned postKnownBits(unsigned LogAlign = 0) const {
    154         return std::max(std::max(unsigned(PostAlign), LogAlign),
    155                         internalKnownBits());
    156       }
    157     };
    158 
    159     std::vector<BasicBlockInfo> BBInfo;
    160 
    161     /// WaterList - A sorted list of basic blocks where islands could be placed
    162     /// (i.e. blocks that don't fall through to the following block, due
    163     /// to a return, unreachable, or unconditional branch).
    164     std::vector<MachineBasicBlock*> WaterList;
    165 
    166     /// NewWaterList - The subset of WaterList that was created since the
    167     /// previous iteration by inserting unconditional branches.
    168     SmallSet<MachineBasicBlock*, 4> NewWaterList;
    169 
    170     typedef std::vector<MachineBasicBlock*>::iterator water_iterator;
    171 
    172     /// CPUser - One user of a constant pool, keeping the machine instruction
    173     /// pointer, the constant pool being referenced, and the max displacement
    174     /// allowed from the instruction to the CP.  The HighWaterMark records the
    175     /// highest basic block where a new CPEntry can be placed.  To ensure this
    176     /// pass terminates, the CP entries are initially placed at the end of the
    177     /// function and then move monotonically to lower addresses.  The
    178     /// exception to this rule is when the current CP entry for a particular
    179     /// CPUser is out of range, but there is another CP entry for the same
    180     /// constant value in range.  We want to use the existing in-range CP
    181     /// entry, but if it later moves out of range, the search for new water
    182     /// should resume where it left off.  The HighWaterMark is used to record
    183     /// that point.
    184     struct CPUser {
    185       MachineInstr *MI;
    186       MachineInstr *CPEMI;
    187       MachineBasicBlock *HighWaterMark;
    188       unsigned MaxDisp;
    189       bool NegOk;
    190       bool IsSoImm;
    191       bool KnownAlignment;
    192       CPUser(MachineInstr *mi, MachineInstr *cpemi, unsigned maxdisp,
    193              bool neg, bool soimm)
    194         : MI(mi), CPEMI(cpemi), MaxDisp(maxdisp), NegOk(neg), IsSoImm(soimm),
    195           KnownAlignment(false) {
    196         HighWaterMark = CPEMI->getParent();
    197       }
    198       /// getMaxDisp - Returns the maximum displacement supported by MI.
    199       /// Correct for unknown alignment.
    200       /// Conservatively subtract 2 bytes to handle weird alignment effects.
    201       unsigned getMaxDisp() const {
    202         return (KnownAlignment ? MaxDisp : MaxDisp - 2) - 2;
    203       }
    204     };
    205 
    206     /// CPUsers - Keep track of all of the machine instructions that use various
    207     /// constant pools and their max displacement.
    208     std::vector<CPUser> CPUsers;
    209 
    210     /// CPEntry - One per constant pool entry, keeping the machine instruction
    211     /// pointer, the constpool index, and the number of CPUser's which
    212     /// reference this entry.
    213     struct CPEntry {
    214       MachineInstr *CPEMI;
    215       unsigned CPI;
    216       unsigned RefCount;
    217       CPEntry(MachineInstr *cpemi, unsigned cpi, unsigned rc = 0)
    218         : CPEMI(cpemi), CPI(cpi), RefCount(rc) {}
    219     };
    220 
    221     /// CPEntries - Keep track of all of the constant pool entry machine
    222     /// instructions. For each original constpool index (i.e. those that existed
    223     /// upon entry to this pass), it keeps a vector of entries.  Original
    224     /// elements are cloned as we go along; the clones are put in the vector of
    225     /// the original element, but have distinct CPIs.
    226     ///
    227     /// The first half of CPEntries contains generic constants, the second half
    228     /// contains jump tables. Use getCombinedIndex on a generic CPEMI to look up
    229     /// which vector it will be in here.
    230     std::vector<std::vector<CPEntry> > CPEntries;
    231 
    232     /// Maps a JT index to the offset in CPEntries containing copies of that
    233     /// table. The equivalent map for a CONSTPOOL_ENTRY is the identity.
    234     DenseMap<int, int> JumpTableEntryIndices;
    235 
    236     /// Maps a JT index to the LEA that actually uses the index to calculate its
    237     /// base address.
    238     DenseMap<int, int> JumpTableUserIndices;
    239 
    240     /// ImmBranch - One per immediate branch, keeping the machine instruction
    241     /// pointer, conditional or unconditional, the max displacement,
    242     /// and (if isCond is true) the corresponding unconditional branch
    243     /// opcode.
    244     struct ImmBranch {
    245       MachineInstr *MI;
    246       unsigned MaxDisp : 31;
    247       bool isCond : 1;
    248       unsigned UncondBr;
    249       ImmBranch(MachineInstr *mi, unsigned maxdisp, bool cond, unsigned ubr)
    250         : MI(mi), MaxDisp(maxdisp), isCond(cond), UncondBr(ubr) {}
    251     };
    252 
    253     /// ImmBranches - Keep track of all the immediate branch instructions.
    254     ///
    255     std::vector<ImmBranch> ImmBranches;
    256 
    257     /// PushPopMIs - Keep track of all the Thumb push / pop instructions.
    258     ///
    259     SmallVector<MachineInstr*, 4> PushPopMIs;
    260 
    261     /// T2JumpTables - Keep track of all the Thumb2 jumptable instructions.
    262     SmallVector<MachineInstr*, 4> T2JumpTables;
    263 
    264     /// HasFarJump - True if any far jump instruction has been emitted during
    265     /// the branch fix up pass.
    266     bool HasFarJump;
    267 
    268     MachineFunction *MF;
    269     MachineConstantPool *MCP;
    270     const ARMBaseInstrInfo *TII;
    271     const ARMSubtarget *STI;
    272     ARMFunctionInfo *AFI;
    273     bool isThumb;
    274     bool isThumb1;
    275     bool isThumb2;
    276   public:
    277     static char ID;
    278     ARMConstantIslands() : MachineFunctionPass(ID) {}
    279 
    280     bool runOnMachineFunction(MachineFunction &MF) override;
    281 
    282     const char *getPassName() const override {
    283       return "ARM constant island placement and branch shortening pass";
    284     }
    285 
    286   private:
    287     void doInitialConstPlacement(std::vector<MachineInstr *> &CPEMIs);
    288     void doInitialJumpTablePlacement(std::vector<MachineInstr *> &CPEMIs);
    289     bool BBHasFallthrough(MachineBasicBlock *MBB);
    290     CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
    291     unsigned getCPELogAlign(const MachineInstr *CPEMI);
    292     void scanFunctionJumpTables();
    293     void initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs);
    294     MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
    295     void updateForInsertedWaterBlock(MachineBasicBlock *NewBB);
    296     void adjustBBOffsetsAfter(MachineBasicBlock *BB);
    297     bool decrementCPEReferenceCount(unsigned CPI, MachineInstr* CPEMI);
    298     unsigned getCombinedIndex(const MachineInstr *CPEMI);
    299     int findInRangeCPEntry(CPUser& U, unsigned UserOffset);
    300     bool findAvailableWater(CPUser&U, unsigned UserOffset,
    301                             water_iterator &WaterIter, bool CloserWater);
    302     void createNewWater(unsigned CPUserIndex, unsigned UserOffset,
    303                         MachineBasicBlock *&NewMBB);
    304     bool handleConstantPoolUser(unsigned CPUserIndex, bool CloserWater);
    305     void removeDeadCPEMI(MachineInstr *CPEMI);
    306     bool removeUnusedCPEntries();
    307     bool isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
    308                           MachineInstr *CPEMI, unsigned Disp, bool NegOk,
    309                           bool DoDump = false);
    310     bool isWaterInRange(unsigned UserOffset, MachineBasicBlock *Water,
    311                         CPUser &U, unsigned &Growth);
    312     bool isBBInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
    313     bool fixupImmediateBr(ImmBranch &Br);
    314     bool fixupConditionalBr(ImmBranch &Br);
    315     bool fixupUnconditionalBr(ImmBranch &Br);
    316     bool undoLRSpillRestore();
    317     bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
    318     bool optimizeThumb2Instructions();
    319     bool optimizeThumb2Branches();
    320     bool reorderThumb2JumpTables();
    321     bool preserveBaseRegister(MachineInstr *JumpMI, MachineInstr *LEAMI,
    322                               unsigned &DeadSize, bool &CanDeleteLEA,
    323                               bool &BaseRegKill);
    324     bool optimizeThumb2JumpTables();
    325     MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
    326                                                   MachineBasicBlock *JTBB);
    327 
    328     void computeBlockSize(MachineBasicBlock *MBB);
    329     unsigned getOffsetOf(MachineInstr *MI) const;
    330     unsigned getUserOffset(CPUser&) const;
    331     void dumpBBs();
    332     void verify();
    333 
    334     bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
    335                          unsigned Disp, bool NegativeOK, bool IsSoImm = false);
    336     bool isOffsetInRange(unsigned UserOffset, unsigned TrialOffset,
    337                          const CPUser &U) {
    338       return isOffsetInRange(UserOffset, TrialOffset,
    339                              U.getMaxDisp(), U.NegOk, U.IsSoImm);
    340     }
    341   };
    342   char ARMConstantIslands::ID = 0;
    343 }
    344 
    345 /// verify - check BBOffsets, BBSizes, alignment of islands
    346 void ARMConstantIslands::verify() {
    347 #ifndef NDEBUG
    348   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
    349        MBBI != E; ++MBBI) {
    350     MachineBasicBlock *MBB = &*MBBI;
    351     unsigned MBBId = MBB->getNumber();
    352     assert(!MBBId || BBInfo[MBBId - 1].postOffset() <= BBInfo[MBBId].Offset);
    353   }
    354   DEBUG(dbgs() << "Verifying " << CPUsers.size() << " CP users.\n");
    355   for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
    356     CPUser &U = CPUsers[i];
    357     unsigned UserOffset = getUserOffset(U);
    358     // Verify offset using the real max displacement without the safety
    359     // adjustment.
    360     if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, U.getMaxDisp()+2, U.NegOk,
    361                          /* DoDump = */ true)) {
    362       DEBUG(dbgs() << "OK\n");
    363       continue;
    364     }
    365     DEBUG(dbgs() << "Out of range.\n");
    366     dumpBBs();
    367     DEBUG(MF->dump());
    368     llvm_unreachable("Constant pool entry out of range!");
    369   }
    370 #endif
    371 }
    372 
    373 /// print block size and offset information - debugging
    374 void ARMConstantIslands::dumpBBs() {
    375   DEBUG({
    376     for (unsigned J = 0, E = BBInfo.size(); J !=E; ++J) {
    377       const BasicBlockInfo &BBI = BBInfo[J];
    378       dbgs() << format("%08x BB#%u\t", BBI.Offset, J)
    379              << " kb=" << unsigned(BBI.KnownBits)
    380              << " ua=" << unsigned(BBI.Unalign)
    381              << " pa=" << unsigned(BBI.PostAlign)
    382              << format(" size=%#x\n", BBInfo[J].Size);
    383     }
    384   });
    385 }
    386 
    387 /// createARMConstantIslandPass - returns an instance of the constpool
    388 /// island pass.
    389 FunctionPass *llvm::createARMConstantIslandPass() {
    390   return new ARMConstantIslands();
    391 }
    392 
    393 bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
    394   MF = &mf;
    395   MCP = mf.getConstantPool();
    396 
    397   DEBUG(dbgs() << "***** ARMConstantIslands: "
    398                << MCP->getConstants().size() << " CP entries, aligned to "
    399                << MCP->getConstantPoolAlignment() << " bytes *****\n");
    400 
    401   STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
    402   TII = STI->getInstrInfo();
    403   AFI = MF->getInfo<ARMFunctionInfo>();
    404 
    405   isThumb = AFI->isThumbFunction();
    406   isThumb1 = AFI->isThumb1OnlyFunction();
    407   isThumb2 = AFI->isThumb2Function();
    408 
    409   HasFarJump = false;
    410 
    411   // This pass invalidates liveness information when it splits basic blocks.
    412   MF->getRegInfo().invalidateLiveness();
    413 
    414   // Renumber all of the machine basic blocks in the function, guaranteeing that
    415   // the numbers agree with the position of the block in the function.
    416   MF->RenumberBlocks();
    417 
    418   // Try to reorder and otherwise adjust the block layout to make good use
    419   // of the TB[BH] instructions.
    420   bool MadeChange = false;
    421   if (isThumb2 && AdjustJumpTableBlocks) {
    422     scanFunctionJumpTables();
    423     MadeChange |= reorderThumb2JumpTables();
    424     // Data is out of date, so clear it. It'll be re-computed later.
    425     T2JumpTables.clear();
    426     // Blocks may have shifted around. Keep the numbering up to date.
    427     MF->RenumberBlocks();
    428   }
    429 
    430   // Perform the initial placement of the constant pool entries.  To start with,
    431   // we put them all at the end of the function.
    432   std::vector<MachineInstr*> CPEMIs;
    433   if (!MCP->isEmpty())
    434     doInitialConstPlacement(CPEMIs);
    435 
    436   if (MF->getJumpTableInfo())
    437     doInitialJumpTablePlacement(CPEMIs);
    438 
    439   /// The next UID to take is the first unused one.
    440   AFI->initPICLabelUId(CPEMIs.size());
    441 
    442   // Do the initial scan of the function, building up information about the
    443   // sizes of each block, the location of all the water, and finding all of the
    444   // constant pool users.
    445   initializeFunctionInfo(CPEMIs);
    446   CPEMIs.clear();
    447   DEBUG(dumpBBs());
    448 
    449   // Functions with jump tables need an alignment of 4 because they use the ADR
    450   // instruction, which aligns the PC to 4 bytes before adding an offset.
    451   if (!T2JumpTables.empty())
    452     MF->ensureAlignment(2);
    453 
    454   /// Remove dead constant pool entries.
    455   MadeChange |= removeUnusedCPEntries();
    456 
    457   // Iteratively place constant pool entries and fix up branches until there
    458   // is no change.
    459   unsigned NoCPIters = 0, NoBRIters = 0;
    460   while (true) {
    461     DEBUG(dbgs() << "Beginning CP iteration #" << NoCPIters << '\n');
    462     bool CPChange = false;
    463     for (unsigned i = 0, e = CPUsers.size(); i != e; ++i)
    464       // For most inputs, it converges in no more than 5 iterations.
    465       // If it doens't end in 10, the input may have huge BB or many CPEs.
    466       // In this case, we will try differnt heuristics.
    467       CPChange |= handleConstantPoolUser(i, NoCPIters >= CPMaxIteration / 2);
    468     if (CPChange && ++NoCPIters > CPMaxIteration)
    469       report_fatal_error("Constant Island pass failed to converge!");
    470     DEBUG(dumpBBs());
    471 
    472     // Clear NewWaterList now.  If we split a block for branches, it should
    473     // appear as "new water" for the next iteration of constant pool placement.
    474     NewWaterList.clear();
    475 
    476     DEBUG(dbgs() << "Beginning BR iteration #" << NoBRIters << '\n');
    477     bool BRChange = false;
    478     for (unsigned i = 0, e = ImmBranches.size(); i != e; ++i)
    479       BRChange |= fixupImmediateBr(ImmBranches[i]);
    480     if (BRChange && ++NoBRIters > 30)
    481       report_fatal_error("Branch Fix Up pass failed to converge!");
    482     DEBUG(dumpBBs());
    483 
    484     if (!CPChange && !BRChange)
    485       break;
    486     MadeChange = true;
    487   }
    488 
    489   // Shrink 32-bit Thumb2 branch, load, and store instructions.
    490   if (isThumb2 && !STI->prefers32BitThumb())
    491     MadeChange |= optimizeThumb2Instructions();
    492 
    493   // After a while, this might be made debug-only, but it is not expensive.
    494   verify();
    495 
    496   // If LR has been forced spilled and no far jump (i.e. BL) has been issued,
    497   // undo the spill / restore of LR if possible.
    498   if (isThumb && !HasFarJump && AFI->isLRSpilledForFarJump())
    499     MadeChange |= undoLRSpillRestore();
    500 
    501   // Save the mapping between original and cloned constpool entries.
    502   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
    503     for (unsigned j = 0, je = CPEntries[i].size(); j != je; ++j) {
    504       const CPEntry & CPE = CPEntries[i][j];
    505       if (CPE.CPEMI && CPE.CPEMI->getOperand(1).isCPI())
    506         AFI->recordCPEClone(i, CPE.CPI);
    507     }
    508   }
    509 
    510   DEBUG(dbgs() << '\n'; dumpBBs());
    511 
    512   BBInfo.clear();
    513   WaterList.clear();
    514   CPUsers.clear();
    515   CPEntries.clear();
    516   JumpTableEntryIndices.clear();
    517   JumpTableUserIndices.clear();
    518   ImmBranches.clear();
    519   PushPopMIs.clear();
    520   T2JumpTables.clear();
    521 
    522   return MadeChange;
    523 }
    524 
    525 /// \brief Perform the initial placement of the regular constant pool entries.
    526 /// To start with, we put them all at the end of the function.
    527 void
    528 ARMConstantIslands::doInitialConstPlacement(std::vector<MachineInstr*> &CPEMIs) {
    529   // Create the basic block to hold the CPE's.
    530   MachineBasicBlock *BB = MF->CreateMachineBasicBlock();
    531   MF->push_back(BB);
    532 
    533   // MachineConstantPool measures alignment in bytes. We measure in log2(bytes).
    534   unsigned MaxAlign = Log2_32(MCP->getConstantPoolAlignment());
    535 
    536   // Mark the basic block as required by the const-pool.
    537   BB->setAlignment(MaxAlign);
    538 
    539   // The function needs to be as aligned as the basic blocks. The linker may
    540   // move functions around based on their alignment.
    541   MF->ensureAlignment(BB->getAlignment());
    542 
    543   // Order the entries in BB by descending alignment.  That ensures correct
    544   // alignment of all entries as long as BB is sufficiently aligned.  Keep
    545   // track of the insertion point for each alignment.  We are going to bucket
    546   // sort the entries as they are created.
    547   SmallVector<MachineBasicBlock::iterator, 8> InsPoint(MaxAlign + 1, BB->end());
    548 
    549   // Add all of the constants from the constant pool to the end block, use an
    550   // identity mapping of CPI's to CPE's.
    551   const std::vector<MachineConstantPoolEntry> &CPs = MCP->getConstants();
    552 
    553   const DataLayout &TD = MF->getDataLayout();
    554   for (unsigned i = 0, e = CPs.size(); i != e; ++i) {
    555     unsigned Size = TD.getTypeAllocSize(CPs[i].getType());
    556     assert(Size >= 4 && "Too small constant pool entry");
    557     unsigned Align = CPs[i].getAlignment();
    558     assert(isPowerOf2_32(Align) && "Invalid alignment");
    559     // Verify that all constant pool entries are a multiple of their alignment.
    560     // If not, we would have to pad them out so that instructions stay aligned.
    561     assert((Size % Align) == 0 && "CP Entry not multiple of 4 bytes!");
    562 
    563     // Insert CONSTPOOL_ENTRY before entries with a smaller alignment.
    564     unsigned LogAlign = Log2_32(Align);
    565     MachineBasicBlock::iterator InsAt = InsPoint[LogAlign];
    566     MachineInstr *CPEMI =
    567       BuildMI(*BB, InsAt, DebugLoc(), TII->get(ARM::CONSTPOOL_ENTRY))
    568         .addImm(i).addConstantPoolIndex(i).addImm(Size);
    569     CPEMIs.push_back(CPEMI);
    570 
    571     // Ensure that future entries with higher alignment get inserted before
    572     // CPEMI. This is bucket sort with iterators.
    573     for (unsigned a = LogAlign + 1; a <= MaxAlign; ++a)
    574       if (InsPoint[a] == InsAt)
    575         InsPoint[a] = CPEMI;
    576 
    577     // Add a new CPEntry, but no corresponding CPUser yet.
    578     CPEntries.emplace_back(1, CPEntry(CPEMI, i));
    579     ++NumCPEs;
    580     DEBUG(dbgs() << "Moved CPI#" << i << " to end of function, size = "
    581                  << Size << ", align = " << Align <<'\n');
    582   }
    583   DEBUG(BB->dump());
    584 }
    585 
    586 /// \brief Do initial placement of the jump tables. Because Thumb2's TBB and TBH
    587 /// instructions can be made more efficient if the jump table immediately
    588 /// follows the instruction, it's best to place them immediately next to their
    589 /// jumps to begin with. In almost all cases they'll never be moved from that
    590 /// position.
    591 void ARMConstantIslands::doInitialJumpTablePlacement(
    592     std::vector<MachineInstr *> &CPEMIs) {
    593   unsigned i = CPEntries.size();
    594   auto MJTI = MF->getJumpTableInfo();
    595   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
    596 
    597   MachineBasicBlock *LastCorrectlyNumberedBB = nullptr;
    598   for (MachineBasicBlock &MBB : *MF) {
    599     auto MI = MBB.getLastNonDebugInstr();
    600     if (MI == MBB.end())
    601       continue;
    602 
    603     unsigned JTOpcode;
    604     switch (MI->getOpcode()) {
    605     default:
    606       continue;
    607     case ARM::BR_JTadd:
    608     case ARM::BR_JTr:
    609     case ARM::tBR_JTr:
    610     case ARM::BR_JTm:
    611       JTOpcode = ARM::JUMPTABLE_ADDRS;
    612       break;
    613     case ARM::t2BR_JT:
    614       JTOpcode = ARM::JUMPTABLE_INSTS;
    615       break;
    616     case ARM::t2TBB_JT:
    617       JTOpcode = ARM::JUMPTABLE_TBB;
    618       break;
    619     case ARM::t2TBH_JT:
    620       JTOpcode = ARM::JUMPTABLE_TBH;
    621       break;
    622     }
    623 
    624     unsigned NumOps = MI->getDesc().getNumOperands();
    625     MachineOperand JTOp =
    626       MI->getOperand(NumOps - (MI->isPredicable() ? 2 : 1));
    627     unsigned JTI = JTOp.getIndex();
    628     unsigned Size = JT[JTI].MBBs.size() * sizeof(uint32_t);
    629     MachineBasicBlock *JumpTableBB = MF->CreateMachineBasicBlock();
    630     MF->insert(std::next(MachineFunction::iterator(MBB)), JumpTableBB);
    631     MachineInstr *CPEMI = BuildMI(*JumpTableBB, JumpTableBB->begin(),
    632                                   DebugLoc(), TII->get(JTOpcode))
    633                               .addImm(i++)
    634                               .addJumpTableIndex(JTI)
    635                               .addImm(Size);
    636     CPEMIs.push_back(CPEMI);
    637     CPEntries.emplace_back(1, CPEntry(CPEMI, JTI));
    638     JumpTableEntryIndices.insert(std::make_pair(JTI, CPEntries.size() - 1));
    639     if (!LastCorrectlyNumberedBB)
    640       LastCorrectlyNumberedBB = &MBB;
    641   }
    642 
    643   // If we did anything then we need to renumber the subsequent blocks.
    644   if (LastCorrectlyNumberedBB)
    645     MF->RenumberBlocks(LastCorrectlyNumberedBB);
    646 }
    647 
    648 /// BBHasFallthrough - Return true if the specified basic block can fallthrough
    649 /// into the block immediately after it.
    650 bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
    651   // Get the next machine basic block in the function.
    652   MachineFunction::iterator MBBI = MBB->getIterator();
    653   // Can't fall off end of function.
    654   if (std::next(MBBI) == MBB->getParent()->end())
    655     return false;
    656 
    657   MachineBasicBlock *NextBB = &*std::next(MBBI);
    658   if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
    659     return false;
    660 
    661   // Try to analyze the end of the block. A potential fallthrough may already
    662   // have an unconditional branch for whatever reason.
    663   MachineBasicBlock *TBB, *FBB;
    664   SmallVector<MachineOperand, 4> Cond;
    665   bool TooDifficult = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
    666   return TooDifficult || FBB == nullptr;
    667 }
    668 
    669 /// findConstPoolEntry - Given the constpool index and CONSTPOOL_ENTRY MI,
    670 /// look up the corresponding CPEntry.
    671 ARMConstantIslands::CPEntry
    672 *ARMConstantIslands::findConstPoolEntry(unsigned CPI,
    673                                         const MachineInstr *CPEMI) {
    674   std::vector<CPEntry> &CPEs = CPEntries[CPI];
    675   // Number of entries per constpool index should be small, just do a
    676   // linear search.
    677   for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
    678     if (CPEs[i].CPEMI == CPEMI)
    679       return &CPEs[i];
    680   }
    681   return nullptr;
    682 }
    683 
    684 /// getCPELogAlign - Returns the required alignment of the constant pool entry
    685 /// represented by CPEMI.  Alignment is measured in log2(bytes) units.
    686 unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
    687   switch (CPEMI->getOpcode()) {
    688   case ARM::CONSTPOOL_ENTRY:
    689     break;
    690   case ARM::JUMPTABLE_TBB:
    691     return 0;
    692   case ARM::JUMPTABLE_TBH:
    693   case ARM::JUMPTABLE_INSTS:
    694     return 1;
    695   case ARM::JUMPTABLE_ADDRS:
    696     return 2;
    697   default:
    698     llvm_unreachable("unknown constpool entry kind");
    699   }
    700 
    701   unsigned CPI = getCombinedIndex(CPEMI);
    702   assert(CPI < MCP->getConstants().size() && "Invalid constant pool index.");
    703   unsigned Align = MCP->getConstants()[CPI].getAlignment();
    704   assert(isPowerOf2_32(Align) && "Invalid CPE alignment");
    705   return Log2_32(Align);
    706 }
    707 
    708 /// scanFunctionJumpTables - Do a scan of the function, building up
    709 /// information about the sizes of each block and the locations of all
    710 /// the jump tables.
    711 void ARMConstantIslands::scanFunctionJumpTables() {
    712   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
    713        MBBI != E; ++MBBI) {
    714     MachineBasicBlock &MBB = *MBBI;
    715 
    716     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    717          I != E; ++I)
    718       if (I->isBranch() && I->getOpcode() == ARM::t2BR_JT)
    719         T2JumpTables.push_back(I);
    720   }
    721 }
    722 
    723 /// initializeFunctionInfo - Do the initial scan of the function, building up
    724 /// information about the sizes of each block, the location of all the water,
    725 /// and finding all of the constant pool users.
    726 void ARMConstantIslands::
    727 initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
    728   BBInfo.clear();
    729   BBInfo.resize(MF->getNumBlockIDs());
    730 
    731   // First thing, compute the size of all basic blocks, and see if the function
    732   // has any inline assembly in it. If so, we have to be conservative about
    733   // alignment assumptions, as we don't know for sure the size of any
    734   // instructions in the inline assembly.
    735   for (MachineBasicBlock &MBB : *MF)
    736     computeBlockSize(&MBB);
    737 
    738   // The known bits of the entry block offset are determined by the function
    739   // alignment.
    740   BBInfo.front().KnownBits = MF->getAlignment();
    741 
    742   // Compute block offsets and known bits.
    743   adjustBBOffsetsAfter(&MF->front());
    744 
    745   // Now go back through the instructions and build up our data structures.
    746   for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end();
    747        MBBI != E; ++MBBI) {
    748     MachineBasicBlock &MBB = *MBBI;
    749 
    750     // If this block doesn't fall through into the next MBB, then this is
    751     // 'water' that a constant pool island could be placed.
    752     if (!BBHasFallthrough(&MBB))
    753       WaterList.push_back(&MBB);
    754 
    755     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    756          I != E; ++I) {
    757       if (I->isDebugValue())
    758         continue;
    759 
    760       unsigned Opc = I->getOpcode();
    761       if (I->isBranch()) {
    762         bool isCond = false;
    763         unsigned Bits = 0;
    764         unsigned Scale = 1;
    765         int UOpc = Opc;
    766         switch (Opc) {
    767         default:
    768           continue;  // Ignore other JT branches
    769         case ARM::t2BR_JT:
    770           T2JumpTables.push_back(I);
    771           continue;   // Does not get an entry in ImmBranches
    772         case ARM::Bcc:
    773           isCond = true;
    774           UOpc = ARM::B;
    775           // Fallthrough
    776         case ARM::B:
    777           Bits = 24;
    778           Scale = 4;
    779           break;
    780         case ARM::tBcc:
    781           isCond = true;
    782           UOpc = ARM::tB;
    783           Bits = 8;
    784           Scale = 2;
    785           break;
    786         case ARM::tB:
    787           Bits = 11;
    788           Scale = 2;
    789           break;
    790         case ARM::t2Bcc:
    791           isCond = true;
    792           UOpc = ARM::t2B;
    793           Bits = 20;
    794           Scale = 2;
    795           break;
    796         case ARM::t2B:
    797           Bits = 24;
    798           Scale = 2;
    799           break;
    800         }
    801 
    802         // Record this immediate branch.
    803         unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
    804         ImmBranches.push_back(ImmBranch(I, MaxOffs, isCond, UOpc));
    805       }
    806 
    807       if (Opc == ARM::tPUSH || Opc == ARM::tPOP_RET)
    808         PushPopMIs.push_back(I);
    809 
    810       if (Opc == ARM::CONSTPOOL_ENTRY || Opc == ARM::JUMPTABLE_ADDRS ||
    811           Opc == ARM::JUMPTABLE_INSTS || Opc == ARM::JUMPTABLE_TBB ||
    812           Opc == ARM::JUMPTABLE_TBH)
    813         continue;
    814 
    815       // Scan the instructions for constant pool operands.
    816       for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op)
    817         if (I->getOperand(op).isCPI() || I->getOperand(op).isJTI()) {
    818           // We found one.  The addressing mode tells us the max displacement
    819           // from the PC that this instruction permits.
    820 
    821           // Basic size info comes from the TSFlags field.
    822           unsigned Bits = 0;
    823           unsigned Scale = 1;
    824           bool NegOk = false;
    825           bool IsSoImm = false;
    826 
    827           switch (Opc) {
    828           default:
    829             llvm_unreachable("Unknown addressing mode for CP reference!");
    830 
    831           // Taking the address of a CP entry.
    832           case ARM::LEApcrel:
    833           case ARM::LEApcrelJT:
    834             // This takes a SoImm, which is 8 bit immediate rotated. We'll
    835             // pretend the maximum offset is 255 * 4. Since each instruction
    836             // 4 byte wide, this is always correct. We'll check for other
    837             // displacements that fits in a SoImm as well.
    838             Bits = 8;
    839             Scale = 4;
    840             NegOk = true;
    841             IsSoImm = true;
    842             break;
    843           case ARM::t2LEApcrel:
    844           case ARM::t2LEApcrelJT:
    845             Bits = 12;
    846             NegOk = true;
    847             break;
    848           case ARM::tLEApcrel:
    849           case ARM::tLEApcrelJT:
    850             Bits = 8;
    851             Scale = 4;
    852             break;
    853 
    854           case ARM::LDRBi12:
    855           case ARM::LDRi12:
    856           case ARM::LDRcp:
    857           case ARM::t2LDRpci:
    858             Bits = 12;  // +-offset_12
    859             NegOk = true;
    860             break;
    861 
    862           case ARM::tLDRpci:
    863             Bits = 8;
    864             Scale = 4;  // +(offset_8*4)
    865             break;
    866 
    867           case ARM::VLDRD:
    868           case ARM::VLDRS:
    869             Bits = 8;
    870             Scale = 4;  // +-(offset_8*4)
    871             NegOk = true;
    872             break;
    873           }
    874 
    875           // Remember that this is a user of a CP entry.
    876           unsigned CPI = I->getOperand(op).getIndex();
    877           if (I->getOperand(op).isJTI()) {
    878             JumpTableUserIndices.insert(std::make_pair(CPI, CPUsers.size()));
    879             CPI = JumpTableEntryIndices[CPI];
    880           }
    881 
    882           MachineInstr *CPEMI = CPEMIs[CPI];
    883           unsigned MaxOffs = ((1 << Bits)-1) * Scale;
    884           CPUsers.push_back(CPUser(I, CPEMI, MaxOffs, NegOk, IsSoImm));
    885 
    886           // Increment corresponding CPEntry reference count.
    887           CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
    888           assert(CPE && "Cannot find a corresponding CPEntry!");
    889           CPE->RefCount++;
    890 
    891           // Instructions can only use one CP entry, don't bother scanning the
    892           // rest of the operands.
    893           break;
    894         }
    895     }
    896   }
    897 }
    898 
    899 /// computeBlockSize - Compute the size and some alignment information for MBB.
    900 /// This function updates BBInfo directly.
    901 void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
    902   BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
    903   BBI.Size = 0;
    904   BBI.Unalign = 0;
    905   BBI.PostAlign = 0;
    906 
    907   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
    908        ++I) {
    909     BBI.Size += TII->GetInstSizeInBytes(I);
    910     // For inline asm, GetInstSizeInBytes returns a conservative estimate.
    911     // The actual size may be smaller, but still a multiple of the instr size.
    912     if (I->isInlineAsm())
    913       BBI.Unalign = isThumb ? 1 : 2;
    914     // Also consider instructions that may be shrunk later.
    915     else if (isThumb && mayOptimizeThumb2Instruction(I))
    916       BBI.Unalign = 1;
    917   }
    918 
    919   // tBR_JTr contains a .align 2 directive.
    920   if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
    921     BBI.PostAlign = 2;
    922     MBB->getParent()->ensureAlignment(2);
    923   }
    924 }
    925 
    926 /// getOffsetOf - Return the current offset of the specified machine instruction
    927 /// from the start of the function.  This offset changes as stuff is moved
    928 /// around inside the function.
    929 unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
    930   MachineBasicBlock *MBB = MI->getParent();
    931 
    932   // The offset is composed of two things: the sum of the sizes of all MBB's
    933   // before this instruction's block, and the offset from the start of the block
    934   // it is in.
    935   unsigned Offset = BBInfo[MBB->getNumber()].Offset;
    936 
    937   // Sum instructions before MI in MBB.
    938   for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
    939     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
    940     Offset += TII->GetInstSizeInBytes(I);
    941   }
    942   return Offset;
    943 }
    944 
    945 /// CompareMBBNumbers - Little predicate function to sort the WaterList by MBB
    946 /// ID.
    947 static bool CompareMBBNumbers(const MachineBasicBlock *LHS,
    948                               const MachineBasicBlock *RHS) {
    949   return LHS->getNumber() < RHS->getNumber();
    950 }
    951 
    952 /// updateForInsertedWaterBlock - When a block is newly inserted into the
    953 /// machine function, it upsets all of the block numbers.  Renumber the blocks
    954 /// and update the arrays that parallel this numbering.
    955 void ARMConstantIslands::updateForInsertedWaterBlock(MachineBasicBlock *NewBB) {
    956   // Renumber the MBB's to keep them consecutive.
    957   NewBB->getParent()->RenumberBlocks(NewBB);
    958 
    959   // Insert an entry into BBInfo to align it properly with the (newly
    960   // renumbered) block numbers.
    961   BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
    962 
    963   // Next, update WaterList.  Specifically, we need to add NewMBB as having
    964   // available water after it.
    965   water_iterator IP =
    966     std::lower_bound(WaterList.begin(), WaterList.end(), NewBB,
    967                      CompareMBBNumbers);
    968   WaterList.insert(IP, NewBB);
    969 }
    970 
    971 
    972 /// Split the basic block containing MI into two blocks, which are joined by
    973 /// an unconditional branch.  Update data structures and renumber blocks to
    974 /// account for this change and returns the newly created block.
    975 MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
    976   MachineBasicBlock *OrigBB = MI->getParent();
    977 
    978   // Create a new MBB for the code after the OrigBB.
    979   MachineBasicBlock *NewBB =
    980     MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
    981   MachineFunction::iterator MBBI = ++OrigBB->getIterator();
    982   MF->insert(MBBI, NewBB);
    983 
    984   // Splice the instructions starting with MI over to NewBB.
    985   NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
    986 
    987   // Add an unconditional branch from OrigBB to NewBB.
    988   // Note the new unconditional branch is not being recorded.
    989   // There doesn't seem to be meaningful DebugInfo available; this doesn't
    990   // correspond to anything in the source.
    991   unsigned Opc = isThumb ? (isThumb2 ? ARM::t2B : ARM::tB) : ARM::B;
    992   if (!isThumb)
    993     BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB);
    994   else
    995     BuildMI(OrigBB, DebugLoc(), TII->get(Opc)).addMBB(NewBB)
    996             .addImm(ARMCC::AL).addReg(0);
    997   ++NumSplit;
    998 
    999   // Update the CFG.  All succs of OrigBB are now succs of NewBB.
   1000   NewBB->transferSuccessors(OrigBB);
   1001 
   1002   // OrigBB branches to NewBB.
   1003   OrigBB->addSuccessor(NewBB);
   1004 
   1005   // Update internal data structures to account for the newly inserted MBB.
   1006   // This is almost the same as updateForInsertedWaterBlock, except that
   1007   // the Water goes after OrigBB, not NewBB.
   1008   MF->RenumberBlocks(NewBB);
   1009 
   1010   // Insert an entry into BBInfo to align it properly with the (newly
   1011   // renumbered) block numbers.
   1012   BBInfo.insert(BBInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
   1013 
   1014   // Next, update WaterList.  Specifically, we need to add OrigMBB as having
   1015   // available water after it (but not if it's already there, which happens
   1016   // when splitting before a conditional branch that is followed by an
   1017   // unconditional branch - in that case we want to insert NewBB).
   1018   water_iterator IP =
   1019     std::lower_bound(WaterList.begin(), WaterList.end(), OrigBB,
   1020                      CompareMBBNumbers);
   1021   MachineBasicBlock* WaterBB = *IP;
   1022   if (WaterBB == OrigBB)
   1023     WaterList.insert(std::next(IP), NewBB);
   1024   else
   1025     WaterList.insert(IP, OrigBB);
   1026   NewWaterList.insert(OrigBB);
   1027 
   1028   // Figure out how large the OrigBB is.  As the first half of the original
   1029   // block, it cannot contain a tablejump.  The size includes
   1030   // the new jump we added.  (It should be possible to do this without
   1031   // recounting everything, but it's very confusing, and this is rarely
   1032   // executed.)
   1033   computeBlockSize(OrigBB);
   1034 
   1035   // Figure out how large the NewMBB is.  As the second half of the original
   1036   // block, it may contain a tablejump.
   1037   computeBlockSize(NewBB);
   1038 
   1039   // All BBOffsets following these blocks must be modified.
   1040   adjustBBOffsetsAfter(OrigBB);
   1041 
   1042   return NewBB;
   1043 }
   1044 
   1045 /// getUserOffset - Compute the offset of U.MI as seen by the hardware
   1046 /// displacement computation.  Update U.KnownAlignment to match its current
   1047 /// basic block location.
   1048 unsigned ARMConstantIslands::getUserOffset(CPUser &U) const {
   1049   unsigned UserOffset = getOffsetOf(U.MI);
   1050   const BasicBlockInfo &BBI = BBInfo[U.MI->getParent()->getNumber()];
   1051   unsigned KnownBits = BBI.internalKnownBits();
   1052 
   1053   // The value read from PC is offset from the actual instruction address.
   1054   UserOffset += (isThumb ? 4 : 8);
   1055 
   1056   // Because of inline assembly, we may not know the alignment (mod 4) of U.MI.
   1057   // Make sure U.getMaxDisp() returns a constrained range.
   1058   U.KnownAlignment = (KnownBits >= 2);
   1059 
   1060   // On Thumb, offsets==2 mod 4 are rounded down by the hardware for
   1061   // purposes of the displacement computation; compensate for that here.
   1062   // For unknown alignments, getMaxDisp() constrains the range instead.
   1063   if (isThumb && U.KnownAlignment)
   1064     UserOffset &= ~3u;
   1065 
   1066   return UserOffset;
   1067 }
   1068 
   1069 /// isOffsetInRange - Checks whether UserOffset (the location of a constant pool
   1070 /// reference) is within MaxDisp of TrialOffset (a proposed location of a
   1071 /// constant pool entry).
   1072 /// UserOffset is computed by getUserOffset above to include PC adjustments. If
   1073 /// the mod 4 alignment of UserOffset is not known, the uncertainty must be
   1074 /// subtracted from MaxDisp instead. CPUser::getMaxDisp() does that.
   1075 bool ARMConstantIslands::isOffsetInRange(unsigned UserOffset,
   1076                                          unsigned TrialOffset, unsigned MaxDisp,
   1077                                          bool NegativeOK, bool IsSoImm) {
   1078   if (UserOffset <= TrialOffset) {
   1079     // User before the Trial.
   1080     if (TrialOffset - UserOffset <= MaxDisp)
   1081       return true;
   1082     // FIXME: Make use full range of soimm values.
   1083   } else if (NegativeOK) {
   1084     if (UserOffset - TrialOffset <= MaxDisp)
   1085       return true;
   1086     // FIXME: Make use full range of soimm values.
   1087   }
   1088   return false;
   1089 }
   1090 
   1091 /// isWaterInRange - Returns true if a CPE placed after the specified
   1092 /// Water (a basic block) will be in range for the specific MI.
   1093 ///
   1094 /// Compute how much the function will grow by inserting a CPE after Water.
   1095 bool ARMConstantIslands::isWaterInRange(unsigned UserOffset,
   1096                                         MachineBasicBlock* Water, CPUser &U,
   1097                                         unsigned &Growth) {
   1098   unsigned CPELogAlign = getCPELogAlign(U.CPEMI);
   1099   unsigned CPEOffset = BBInfo[Water->getNumber()].postOffset(CPELogAlign);
   1100   unsigned NextBlockOffset, NextBlockAlignment;
   1101   MachineFunction::const_iterator NextBlock = Water->getIterator();
   1102   if (++NextBlock == MF->end()) {
   1103     NextBlockOffset = BBInfo[Water->getNumber()].postOffset();
   1104     NextBlockAlignment = 0;
   1105   } else {
   1106     NextBlockOffset = BBInfo[NextBlock->getNumber()].Offset;
   1107     NextBlockAlignment = NextBlock->getAlignment();
   1108   }
   1109   unsigned Size = U.CPEMI->getOperand(2).getImm();
   1110   unsigned CPEEnd = CPEOffset + Size;
   1111 
   1112   // The CPE may be able to hide in the alignment padding before the next
   1113   // block. It may also cause more padding to be required if it is more aligned
   1114   // that the next block.
   1115   if (CPEEnd > NextBlockOffset) {
   1116     Growth = CPEEnd - NextBlockOffset;
   1117     // Compute the padding that would go at the end of the CPE to align the next
   1118     // block.
   1119     Growth += OffsetToAlignment(CPEEnd, 1u << NextBlockAlignment);
   1120 
   1121     // If the CPE is to be inserted before the instruction, that will raise
   1122     // the offset of the instruction. Also account for unknown alignment padding
   1123     // in blocks between CPE and the user.
   1124     if (CPEOffset < UserOffset)
   1125       UserOffset += Growth + UnknownPadding(MF->getAlignment(), CPELogAlign);
   1126   } else
   1127     // CPE fits in existing padding.
   1128     Growth = 0;
   1129 
   1130   return isOffsetInRange(UserOffset, CPEOffset, U);
   1131 }
   1132 
   1133 /// isCPEntryInRange - Returns true if the distance between specific MI and
   1134 /// specific ConstPool entry instruction can fit in MI's displacement field.
   1135 bool ARMConstantIslands::isCPEntryInRange(MachineInstr *MI, unsigned UserOffset,
   1136                                       MachineInstr *CPEMI, unsigned MaxDisp,
   1137                                       bool NegOk, bool DoDump) {
   1138   unsigned CPEOffset  = getOffsetOf(CPEMI);
   1139 
   1140   if (DoDump) {
   1141     DEBUG({
   1142       unsigned Block = MI->getParent()->getNumber();
   1143       const BasicBlockInfo &BBI = BBInfo[Block];
   1144       dbgs() << "User of CPE#" << CPEMI->getOperand(0).getImm()
   1145              << " max delta=" << MaxDisp
   1146              << format(" insn address=%#x", UserOffset)
   1147              << " in BB#" << Block << ": "
   1148              << format("%#x-%x\t", BBI.Offset, BBI.postOffset()) << *MI
   1149              << format("CPE address=%#x offset=%+d: ", CPEOffset,
   1150                        int(CPEOffset-UserOffset));
   1151     });
   1152   }
   1153 
   1154   return isOffsetInRange(UserOffset, CPEOffset, MaxDisp, NegOk);
   1155 }
   1156 
   1157 #ifndef NDEBUG
   1158 /// BBIsJumpedOver - Return true of the specified basic block's only predecessor
   1159 /// unconditionally branches to its only successor.
   1160 static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
   1161   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
   1162     return false;
   1163 
   1164   MachineBasicBlock *Succ = *MBB->succ_begin();
   1165   MachineBasicBlock *Pred = *MBB->pred_begin();
   1166   MachineInstr *PredMI = &Pred->back();
   1167   if (PredMI->getOpcode() == ARM::B || PredMI->getOpcode() == ARM::tB
   1168       || PredMI->getOpcode() == ARM::t2B)
   1169     return PredMI->getOperand(0).getMBB() == Succ;
   1170   return false;
   1171 }
   1172 #endif // NDEBUG
   1173 
   1174 void ARMConstantIslands::adjustBBOffsetsAfter(MachineBasicBlock *BB) {
   1175   unsigned BBNum = BB->getNumber();
   1176   for(unsigned i = BBNum + 1, e = MF->getNumBlockIDs(); i < e; ++i) {
   1177     // Get the offset and known bits at the end of the layout predecessor.
   1178     // Include the alignment of the current block.
   1179     unsigned LogAlign = MF->getBlockNumbered(i)->getAlignment();
   1180     unsigned Offset = BBInfo[i - 1].postOffset(LogAlign);
   1181     unsigned KnownBits = BBInfo[i - 1].postKnownBits(LogAlign);
   1182 
   1183     // This is where block i begins.  Stop if the offset is already correct,
   1184     // and we have updated 2 blocks.  This is the maximum number of blocks
   1185     // changed before calling this function.
   1186     if (i > BBNum + 2 &&
   1187         BBInfo[i].Offset == Offset &&
   1188         BBInfo[i].KnownBits == KnownBits)
   1189       break;
   1190 
   1191     BBInfo[i].Offset = Offset;
   1192     BBInfo[i].KnownBits = KnownBits;
   1193   }
   1194 }
   1195 
   1196 /// decrementCPEReferenceCount - find the constant pool entry with index CPI
   1197 /// and instruction CPEMI, and decrement its refcount.  If the refcount
   1198 /// becomes 0 remove the entry and instruction.  Returns true if we removed
   1199 /// the entry, false if we didn't.
   1200 
   1201 bool ARMConstantIslands::decrementCPEReferenceCount(unsigned CPI,
   1202                                                     MachineInstr *CPEMI) {
   1203   // Find the old entry. Eliminate it if it is no longer used.
   1204   CPEntry *CPE = findConstPoolEntry(CPI, CPEMI);
   1205   assert(CPE && "Unexpected!");
   1206   if (--CPE->RefCount == 0) {
   1207     removeDeadCPEMI(CPEMI);
   1208     CPE->CPEMI = nullptr;
   1209     --NumCPEs;
   1210     return true;
   1211   }
   1212   return false;
   1213 }
   1214 
   1215 unsigned ARMConstantIslands::getCombinedIndex(const MachineInstr *CPEMI) {
   1216   if (CPEMI->getOperand(1).isCPI())
   1217     return CPEMI->getOperand(1).getIndex();
   1218 
   1219   return JumpTableEntryIndices[CPEMI->getOperand(1).getIndex()];
   1220 }
   1221 
   1222 /// LookForCPEntryInRange - see if the currently referenced CPE is in range;
   1223 /// if not, see if an in-range clone of the CPE is in range, and if so,
   1224 /// change the data structures so the user references the clone.  Returns:
   1225 /// 0 = no existing entry found
   1226 /// 1 = entry found, and there were no code insertions or deletions
   1227 /// 2 = entry found, and there were code insertions or deletions
   1228 int ARMConstantIslands::findInRangeCPEntry(CPUser& U, unsigned UserOffset)
   1229 {
   1230   MachineInstr *UserMI = U.MI;
   1231   MachineInstr *CPEMI  = U.CPEMI;
   1232 
   1233   // Check to see if the CPE is already in-range.
   1234   if (isCPEntryInRange(UserMI, UserOffset, CPEMI, U.getMaxDisp(), U.NegOk,
   1235                        true)) {
   1236     DEBUG(dbgs() << "In range\n");
   1237     return 1;
   1238   }
   1239 
   1240   // No.  Look for previously created clones of the CPE that are in range.
   1241   unsigned CPI = getCombinedIndex(CPEMI);
   1242   std::vector<CPEntry> &CPEs = CPEntries[CPI];
   1243   for (unsigned i = 0, e = CPEs.size(); i != e; ++i) {
   1244     // We already tried this one
   1245     if (CPEs[i].CPEMI == CPEMI)
   1246       continue;
   1247     // Removing CPEs can leave empty entries, skip
   1248     if (CPEs[i].CPEMI == nullptr)
   1249       continue;
   1250     if (isCPEntryInRange(UserMI, UserOffset, CPEs[i].CPEMI, U.getMaxDisp(),
   1251                      U.NegOk)) {
   1252       DEBUG(dbgs() << "Replacing CPE#" << CPI << " with CPE#"
   1253                    << CPEs[i].CPI << "\n");
   1254       // Point the CPUser node to the replacement
   1255       U.CPEMI = CPEs[i].CPEMI;
   1256       // Change the CPI in the instruction operand to refer to the clone.
   1257       for (unsigned j = 0, e = UserMI->getNumOperands(); j != e; ++j)
   1258         if (UserMI->getOperand(j).isCPI()) {
   1259           UserMI->getOperand(j).setIndex(CPEs[i].CPI);
   1260           break;
   1261         }
   1262       // Adjust the refcount of the clone...
   1263       CPEs[i].RefCount++;
   1264       // ...and the original.  If we didn't remove the old entry, none of the
   1265       // addresses changed, so we don't need another pass.
   1266       return decrementCPEReferenceCount(CPI, CPEMI) ? 2 : 1;
   1267     }
   1268   }
   1269   return 0;
   1270 }
   1271 
   1272 /// getUnconditionalBrDisp - Returns the maximum displacement that can fit in
   1273 /// the specific unconditional branch instruction.
   1274 static inline unsigned getUnconditionalBrDisp(int Opc) {
   1275   switch (Opc) {
   1276   case ARM::tB:
   1277     return ((1<<10)-1)*2;
   1278   case ARM::t2B:
   1279     return ((1<<23)-1)*2;
   1280   default:
   1281     break;
   1282   }
   1283 
   1284   return ((1<<23)-1)*4;
   1285 }
   1286 
   1287 /// findAvailableWater - Look for an existing entry in the WaterList in which
   1288 /// we can place the CPE referenced from U so it's within range of U's MI.
   1289 /// Returns true if found, false if not.  If it returns true, WaterIter
   1290 /// is set to the WaterList entry.  For Thumb, prefer water that will not
   1291 /// introduce padding to water that will.  To ensure that this pass
   1292 /// terminates, the CPE location for a particular CPUser is only allowed to
   1293 /// move to a lower address, so search backward from the end of the list and
   1294 /// prefer the first water that is in range.
   1295 bool ARMConstantIslands::findAvailableWater(CPUser &U, unsigned UserOffset,
   1296                                             water_iterator &WaterIter,
   1297                                             bool CloserWater) {
   1298   if (WaterList.empty())
   1299     return false;
   1300 
   1301   unsigned BestGrowth = ~0u;
   1302   // The nearest water without splitting the UserBB is right after it.
   1303   // If the distance is still large (we have a big BB), then we need to split it
   1304   // if we don't converge after certain iterations. This helps the following
   1305   // situation to converge:
   1306   //   BB0:
   1307   //      Big BB
   1308   //   BB1:
   1309   //      Constant Pool
   1310   // When a CP access is out of range, BB0 may be used as water. However,
   1311   // inserting islands between BB0 and BB1 makes other accesses out of range.
   1312   MachineBasicBlock *UserBB = U.MI->getParent();
   1313   unsigned MinNoSplitDisp =
   1314       BBInfo[UserBB->getNumber()].postOffset(getCPELogAlign(U.CPEMI));
   1315   if (CloserWater && MinNoSplitDisp > U.getMaxDisp() / 2)
   1316     return false;
   1317   for (water_iterator IP = std::prev(WaterList.end()), B = WaterList.begin();;
   1318        --IP) {
   1319     MachineBasicBlock* WaterBB = *IP;
   1320     // Check if water is in range and is either at a lower address than the
   1321     // current "high water mark" or a new water block that was created since
   1322     // the previous iteration by inserting an unconditional branch.  In the
   1323     // latter case, we want to allow resetting the high water mark back to
   1324     // this new water since we haven't seen it before.  Inserting branches
   1325     // should be relatively uncommon and when it does happen, we want to be
   1326     // sure to take advantage of it for all the CPEs near that block, so that
   1327     // we don't insert more branches than necessary.
   1328     // When CloserWater is true, we try to find the lowest address after (or
   1329     // equal to) user MI's BB no matter of padding growth.
   1330     unsigned Growth;
   1331     if (isWaterInRange(UserOffset, WaterBB, U, Growth) &&
   1332         (WaterBB->getNumber() < U.HighWaterMark->getNumber() ||
   1333          NewWaterList.count(WaterBB) || WaterBB == U.MI->getParent()) &&
   1334         Growth < BestGrowth) {
   1335       // This is the least amount of required padding seen so far.
   1336       BestGrowth = Growth;
   1337       WaterIter = IP;
   1338       DEBUG(dbgs() << "Found water after BB#" << WaterBB->getNumber()
   1339                    << " Growth=" << Growth << '\n');
   1340 
   1341       if (CloserWater && WaterBB == U.MI->getParent())
   1342         return true;
   1343       // Keep looking unless it is perfect and we're not looking for the lowest
   1344       // possible address.
   1345       if (!CloserWater && BestGrowth == 0)
   1346         return true;
   1347     }
   1348     if (IP == B)
   1349       break;
   1350   }
   1351   return BestGrowth != ~0u;
   1352 }
   1353 
   1354 /// createNewWater - No existing WaterList entry will work for
   1355 /// CPUsers[CPUserIndex], so create a place to put the CPE.  The end of the
   1356 /// block is used if in range, and the conditional branch munged so control
   1357 /// flow is correct.  Otherwise the block is split to create a hole with an
   1358 /// unconditional branch around it.  In either case NewMBB is set to a
   1359 /// block following which the new island can be inserted (the WaterList
   1360 /// is not adjusted).
   1361 void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
   1362                                         unsigned UserOffset,
   1363                                         MachineBasicBlock *&NewMBB) {
   1364   CPUser &U = CPUsers[CPUserIndex];
   1365   MachineInstr *UserMI = U.MI;
   1366   MachineInstr *CPEMI  = U.CPEMI;
   1367   unsigned CPELogAlign = getCPELogAlign(CPEMI);
   1368   MachineBasicBlock *UserMBB = UserMI->getParent();
   1369   const BasicBlockInfo &UserBBI = BBInfo[UserMBB->getNumber()];
   1370 
   1371   // If the block does not end in an unconditional branch already, and if the
   1372   // end of the block is within range, make new water there.  (The addition
   1373   // below is for the unconditional branch we will be adding: 4 bytes on ARM +
   1374   // Thumb2, 2 on Thumb1.
   1375   if (BBHasFallthrough(UserMBB)) {
   1376     // Size of branch to insert.
   1377     unsigned Delta = isThumb1 ? 2 : 4;
   1378     // Compute the offset where the CPE will begin.
   1379     unsigned CPEOffset = UserBBI.postOffset(CPELogAlign) + Delta;
   1380 
   1381     if (isOffsetInRange(UserOffset, CPEOffset, U)) {
   1382       DEBUG(dbgs() << "Split at end of BB#" << UserMBB->getNumber()
   1383             << format(", expected CPE offset %#x\n", CPEOffset));
   1384       NewMBB = &*++UserMBB->getIterator();
   1385       // Add an unconditional branch from UserMBB to fallthrough block.  Record
   1386       // it for branch lengthening; this new branch will not get out of range,
   1387       // but if the preceding conditional branch is out of range, the targets
   1388       // will be exchanged, and the altered branch may be out of range, so the
   1389       // machinery has to know about it.
   1390       int UncondBr = isThumb ? ((isThumb2) ? ARM::t2B : ARM::tB) : ARM::B;
   1391       if (!isThumb)
   1392         BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB);
   1393       else
   1394         BuildMI(UserMBB, DebugLoc(), TII->get(UncondBr)).addMBB(NewMBB)
   1395           .addImm(ARMCC::AL).addReg(0);
   1396       unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
   1397       ImmBranches.push_back(ImmBranch(&UserMBB->back(),
   1398                                       MaxDisp, false, UncondBr));
   1399       computeBlockSize(UserMBB);
   1400       adjustBBOffsetsAfter(UserMBB);
   1401       return;
   1402     }
   1403   }
   1404 
   1405   // What a big block.  Find a place within the block to split it.  This is a
   1406   // little tricky on Thumb1 since instructions are 2 bytes and constant pool
   1407   // entries are 4 bytes: if instruction I references island CPE, and
   1408   // instruction I+1 references CPE', it will not work well to put CPE as far
   1409   // forward as possible, since then CPE' cannot immediately follow it (that
   1410   // location is 2 bytes farther away from I+1 than CPE was from I) and we'd
   1411   // need to create a new island.  So, we make a first guess, then walk through
   1412   // the instructions between the one currently being looked at and the
   1413   // possible insertion point, and make sure any other instructions that
   1414   // reference CPEs will be able to use the same island area; if not, we back
   1415   // up the insertion point.
   1416 
   1417   // Try to split the block so it's fully aligned.  Compute the latest split
   1418   // point where we can add a 4-byte branch instruction, and then align to
   1419   // LogAlign which is the largest possible alignment in the function.
   1420   unsigned LogAlign = MF->getAlignment();
   1421   assert(LogAlign >= CPELogAlign && "Over-aligned constant pool entry");
   1422   unsigned KnownBits = UserBBI.internalKnownBits();
   1423   unsigned UPad = UnknownPadding(LogAlign, KnownBits);
   1424   unsigned BaseInsertOffset = UserOffset + U.getMaxDisp() - UPad;
   1425   DEBUG(dbgs() << format("Split in middle of big block before %#x",
   1426                          BaseInsertOffset));
   1427 
   1428   // The 4 in the following is for the unconditional branch we'll be inserting
   1429   // (allows for long branch on Thumb1).  Alignment of the island is handled
   1430   // inside isOffsetInRange.
   1431   BaseInsertOffset -= 4;
   1432 
   1433   DEBUG(dbgs() << format(", adjusted to %#x", BaseInsertOffset)
   1434                << " la=" << LogAlign
   1435                << " kb=" << KnownBits
   1436                << " up=" << UPad << '\n');
   1437 
   1438   // This could point off the end of the block if we've already got constant
   1439   // pool entries following this block; only the last one is in the water list.
   1440   // Back past any possible branches (allow for a conditional and a maximally
   1441   // long unconditional).
   1442   if (BaseInsertOffset + 8 >= UserBBI.postOffset()) {
   1443     // Ensure BaseInsertOffset is larger than the offset of the instruction
   1444     // following UserMI so that the loop which searches for the split point
   1445     // iterates at least once.
   1446     BaseInsertOffset =
   1447         std::max(UserBBI.postOffset() - UPad - 8,
   1448                  UserOffset + TII->GetInstSizeInBytes(UserMI) + 1);
   1449     DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
   1450   }
   1451   unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
   1452     CPEMI->getOperand(2).getImm();
   1453   MachineBasicBlock::iterator MI = UserMI;
   1454   ++MI;
   1455   unsigned CPUIndex = CPUserIndex+1;
   1456   unsigned NumCPUsers = CPUsers.size();
   1457   MachineInstr *LastIT = nullptr;
   1458   for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
   1459        Offset < BaseInsertOffset;
   1460        Offset += TII->GetInstSizeInBytes(MI), MI = std::next(MI)) {
   1461     assert(MI != UserMBB->end() && "Fell off end of block");
   1462     if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
   1463       CPUser &U = CPUsers[CPUIndex];
   1464       if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
   1465         // Shift intertion point by one unit of alignment so it is within reach.
   1466         BaseInsertOffset -= 1u << LogAlign;
   1467         EndInsertOffset  -= 1u << LogAlign;
   1468       }
   1469       // This is overly conservative, as we don't account for CPEMIs being
   1470       // reused within the block, but it doesn't matter much.  Also assume CPEs
   1471       // are added in order with alignment padding.  We may eventually be able
   1472       // to pack the aligned CPEs better.
   1473       EndInsertOffset += U.CPEMI->getOperand(2).getImm();
   1474       CPUIndex++;
   1475     }
   1476 
   1477     // Remember the last IT instruction.
   1478     if (MI->getOpcode() == ARM::t2IT)
   1479       LastIT = MI;
   1480   }
   1481 
   1482   --MI;
   1483 
   1484   // Avoid splitting an IT block.
   1485   if (LastIT) {
   1486     unsigned PredReg = 0;
   1487     ARMCC::CondCodes CC = getITInstrPredicate(MI, PredReg);
   1488     if (CC != ARMCC::AL)
   1489       MI = LastIT;
   1490   }
   1491 
   1492   // We really must not split an IT block.
   1493   DEBUG(unsigned PredReg;
   1494         assert(!isThumb || getITInstrPredicate(MI, PredReg) == ARMCC::AL));
   1495 
   1496   NewMBB = splitBlockBeforeInstr(MI);
   1497 }
   1498 
   1499 /// handleConstantPoolUser - Analyze the specified user, checking to see if it
   1500 /// is out-of-range.  If so, pick up the constant pool value and move it some
   1501 /// place in-range.  Return true if we changed any addresses (thus must run
   1502 /// another pass of branch lengthening), false otherwise.
   1503 bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
   1504                                                 bool CloserWater) {
   1505   CPUser &U = CPUsers[CPUserIndex];
   1506   MachineInstr *UserMI = U.MI;
   1507   MachineInstr *CPEMI  = U.CPEMI;
   1508   unsigned CPI = getCombinedIndex(CPEMI);
   1509   unsigned Size = CPEMI->getOperand(2).getImm();
   1510   // Compute this only once, it's expensive.
   1511   unsigned UserOffset = getUserOffset(U);
   1512 
   1513   // See if the current entry is within range, or there is a clone of it
   1514   // in range.
   1515   int result = findInRangeCPEntry(U, UserOffset);
   1516   if (result==1) return false;
   1517   else if (result==2) return true;
   1518 
   1519   // No existing clone of this CPE is within range.
   1520   // We will be generating a new clone.  Get a UID for it.
   1521   unsigned ID = AFI->createPICLabelUId();
   1522 
   1523   // Look for water where we can place this CPE.
   1524   MachineBasicBlock *NewIsland = MF->CreateMachineBasicBlock();
   1525   MachineBasicBlock *NewMBB;
   1526   water_iterator IP;
   1527   if (findAvailableWater(U, UserOffset, IP, CloserWater)) {
   1528     DEBUG(dbgs() << "Found water in range\n");
   1529     MachineBasicBlock *WaterBB = *IP;
   1530 
   1531     // If the original WaterList entry was "new water" on this iteration,
   1532     // propagate that to the new island.  This is just keeping NewWaterList
   1533     // updated to match the WaterList, which will be updated below.
   1534     if (NewWaterList.erase(WaterBB))
   1535       NewWaterList.insert(NewIsland);
   1536 
   1537     // The new CPE goes before the following block (NewMBB).
   1538     NewMBB = &*++WaterBB->getIterator();
   1539   } else {
   1540     // No water found.
   1541     DEBUG(dbgs() << "No water found\n");
   1542     createNewWater(CPUserIndex, UserOffset, NewMBB);
   1543 
   1544     // splitBlockBeforeInstr adds to WaterList, which is important when it is
   1545     // called while handling branches so that the water will be seen on the
   1546     // next iteration for constant pools, but in this context, we don't want
   1547     // it.  Check for this so it will be removed from the WaterList.
   1548     // Also remove any entry from NewWaterList.
   1549     MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
   1550     IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
   1551     if (IP != WaterList.end())
   1552       NewWaterList.erase(WaterBB);
   1553 
   1554     // We are adding new water.  Update NewWaterList.
   1555     NewWaterList.insert(NewIsland);
   1556   }
   1557 
   1558   // Remove the original WaterList entry; we want subsequent insertions in
   1559   // this vicinity to go after the one we're about to insert.  This
   1560   // considerably reduces the number of times we have to move the same CPE
   1561   // more than once and is also important to ensure the algorithm terminates.
   1562   if (IP != WaterList.end())
   1563     WaterList.erase(IP);
   1564 
   1565   // Okay, we know we can put an island before NewMBB now, do it!
   1566   MF->insert(NewMBB->getIterator(), NewIsland);
   1567 
   1568   // Update internal data structures to account for the newly inserted MBB.
   1569   updateForInsertedWaterBlock(NewIsland);
   1570 
   1571   // Now that we have an island to add the CPE to, clone the original CPE and
   1572   // add it to the island.
   1573   U.HighWaterMark = NewIsland;
   1574   U.CPEMI = BuildMI(NewIsland, DebugLoc(), CPEMI->getDesc())
   1575                 .addImm(ID).addOperand(CPEMI->getOperand(1)).addImm(Size);
   1576   CPEntries[CPI].push_back(CPEntry(U.CPEMI, ID, 1));
   1577   ++NumCPEs;
   1578 
   1579   // Decrement the old entry, and remove it if refcount becomes 0.
   1580   decrementCPEReferenceCount(CPI, CPEMI);
   1581 
   1582   // Mark the basic block as aligned as required by the const-pool entry.
   1583   NewIsland->setAlignment(getCPELogAlign(U.CPEMI));
   1584 
   1585   // Increase the size of the island block to account for the new entry.
   1586   BBInfo[NewIsland->getNumber()].Size += Size;
   1587   adjustBBOffsetsAfter(&*--NewIsland->getIterator());
   1588 
   1589   // Finally, change the CPI in the instruction operand to be ID.
   1590   for (unsigned i = 0, e = UserMI->getNumOperands(); i != e; ++i)
   1591     if (UserMI->getOperand(i).isCPI()) {
   1592       UserMI->getOperand(i).setIndex(ID);
   1593       break;
   1594     }
   1595 
   1596   DEBUG(dbgs() << "  Moved CPE to #" << ID << " CPI=" << CPI
   1597         << format(" offset=%#x\n", BBInfo[NewIsland->getNumber()].Offset));
   1598 
   1599   return true;
   1600 }
   1601 
   1602 /// removeDeadCPEMI - Remove a dead constant pool entry instruction. Update
   1603 /// sizes and offsets of impacted basic blocks.
   1604 void ARMConstantIslands::removeDeadCPEMI(MachineInstr *CPEMI) {
   1605   MachineBasicBlock *CPEBB = CPEMI->getParent();
   1606   unsigned Size = CPEMI->getOperand(2).getImm();
   1607   CPEMI->eraseFromParent();
   1608   BBInfo[CPEBB->getNumber()].Size -= Size;
   1609   // All succeeding offsets have the current size value added in, fix this.
   1610   if (CPEBB->empty()) {
   1611     BBInfo[CPEBB->getNumber()].Size = 0;
   1612 
   1613     // This block no longer needs to be aligned.
   1614     CPEBB->setAlignment(0);
   1615   } else
   1616     // Entries are sorted by descending alignment, so realign from the front.
   1617     CPEBB->setAlignment(getCPELogAlign(CPEBB->begin()));
   1618 
   1619   adjustBBOffsetsAfter(CPEBB);
   1620   // An island has only one predecessor BB and one successor BB. Check if
   1621   // this BB's predecessor jumps directly to this BB's successor. This
   1622   // shouldn't happen currently.
   1623   assert(!BBIsJumpedOver(CPEBB) && "How did this happen?");
   1624   // FIXME: remove the empty blocks after all the work is done?
   1625 }
   1626 
   1627 /// removeUnusedCPEntries - Remove constant pool entries whose refcounts
   1628 /// are zero.
   1629 bool ARMConstantIslands::removeUnusedCPEntries() {
   1630   unsigned MadeChange = false;
   1631   for (unsigned i = 0, e = CPEntries.size(); i != e; ++i) {
   1632       std::vector<CPEntry> &CPEs = CPEntries[i];
   1633       for (unsigned j = 0, ee = CPEs.size(); j != ee; ++j) {
   1634         if (CPEs[j].RefCount == 0 && CPEs[j].CPEMI) {
   1635           removeDeadCPEMI(CPEs[j].CPEMI);
   1636           CPEs[j].CPEMI = nullptr;
   1637           MadeChange = true;
   1638         }
   1639       }
   1640   }
   1641   return MadeChange;
   1642 }
   1643 
   1644 /// isBBInRange - Returns true if the distance between specific MI and
   1645 /// specific BB can fit in MI's displacement field.
   1646 bool ARMConstantIslands::isBBInRange(MachineInstr *MI,MachineBasicBlock *DestBB,
   1647                                      unsigned MaxDisp) {
   1648   unsigned PCAdj      = isThumb ? 4 : 8;
   1649   unsigned BrOffset   = getOffsetOf(MI) + PCAdj;
   1650   unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
   1651 
   1652   DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
   1653                << " from BB#" << MI->getParent()->getNumber()
   1654                << " max delta=" << MaxDisp
   1655                << " from " << getOffsetOf(MI) << " to " << DestOffset
   1656                << " offset " << int(DestOffset-BrOffset) << "\t" << *MI);
   1657 
   1658   if (BrOffset <= DestOffset) {
   1659     // Branch before the Dest.
   1660     if (DestOffset-BrOffset <= MaxDisp)
   1661       return true;
   1662   } else {
   1663     if (BrOffset-DestOffset <= MaxDisp)
   1664       return true;
   1665   }
   1666   return false;
   1667 }
   1668 
   1669 /// fixupImmediateBr - Fix up an immediate branch whose destination is too far
   1670 /// away to fit in its displacement field.
   1671 bool ARMConstantIslands::fixupImmediateBr(ImmBranch &Br) {
   1672   MachineInstr *MI = Br.MI;
   1673   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
   1674 
   1675   // Check to see if the DestBB is already in-range.
   1676   if (isBBInRange(MI, DestBB, Br.MaxDisp))
   1677     return false;
   1678 
   1679   if (!Br.isCond)
   1680     return fixupUnconditionalBr(Br);
   1681   return fixupConditionalBr(Br);
   1682 }
   1683 
   1684 /// fixupUnconditionalBr - Fix up an unconditional branch whose destination is
   1685 /// too far away to fit in its displacement field. If the LR register has been
   1686 /// spilled in the epilogue, then we can use BL to implement a far jump.
   1687 /// Otherwise, add an intermediate branch instruction to a branch.
   1688 bool
   1689 ARMConstantIslands::fixupUnconditionalBr(ImmBranch &Br) {
   1690   MachineInstr *MI = Br.MI;
   1691   MachineBasicBlock *MBB = MI->getParent();
   1692   if (!isThumb1)
   1693     llvm_unreachable("fixupUnconditionalBr is Thumb1 only!");
   1694 
   1695   // Use BL to implement far jump.
   1696   Br.MaxDisp = (1 << 21) * 2;
   1697   MI->setDesc(TII->get(ARM::tBfar));
   1698   BBInfo[MBB->getNumber()].Size += 2;
   1699   adjustBBOffsetsAfter(MBB);
   1700   HasFarJump = true;
   1701   ++NumUBrFixed;
   1702 
   1703   DEBUG(dbgs() << "  Changed B to long jump " << *MI);
   1704 
   1705   return true;
   1706 }
   1707 
   1708 /// fixupConditionalBr - Fix up a conditional branch whose destination is too
   1709 /// far away to fit in its displacement field. It is converted to an inverse
   1710 /// conditional branch + an unconditional branch to the destination.
   1711 bool
   1712 ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
   1713   MachineInstr *MI = Br.MI;
   1714   MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
   1715 
   1716   // Add an unconditional branch to the destination and invert the branch
   1717   // condition to jump over it:
   1718   // blt L1
   1719   // =>
   1720   // bge L2
   1721   // b   L1
   1722   // L2:
   1723   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(1).getImm();
   1724   CC = ARMCC::getOppositeCondition(CC);
   1725   unsigned CCReg = MI->getOperand(2).getReg();
   1726 
   1727   // If the branch is at the end of its MBB and that has a fall-through block,
   1728   // direct the updated conditional branch to the fall-through block. Otherwise,
   1729   // split the MBB before the next instruction.
   1730   MachineBasicBlock *MBB = MI->getParent();
   1731   MachineInstr *BMI = &MBB->back();
   1732   bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
   1733 
   1734   ++NumCBrFixed;
   1735   if (BMI != MI) {
   1736     if (std::next(MachineBasicBlock::iterator(MI)) == std::prev(MBB->end()) &&
   1737         BMI->getOpcode() == Br.UncondBr) {
   1738       // Last MI in the BB is an unconditional branch. Can we simply invert the
   1739       // condition and swap destinations:
   1740       // beq L1
   1741       // b   L2
   1742       // =>
   1743       // bne L2
   1744       // b   L1
   1745       MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
   1746       if (isBBInRange(MI, NewDest, Br.MaxDisp)) {
   1747         DEBUG(dbgs() << "  Invert Bcc condition and swap its destination with "
   1748                      << *BMI);
   1749         BMI->getOperand(0).setMBB(DestBB);
   1750         MI->getOperand(0).setMBB(NewDest);
   1751         MI->getOperand(1).setImm(CC);
   1752         return true;
   1753       }
   1754     }
   1755   }
   1756 
   1757   if (NeedSplit) {
   1758     splitBlockBeforeInstr(MI);
   1759     // No need for the branch to the next block. We're adding an unconditional
   1760     // branch to the destination.
   1761     int delta = TII->GetInstSizeInBytes(&MBB->back());
   1762     BBInfo[MBB->getNumber()].Size -= delta;
   1763     MBB->back().eraseFromParent();
   1764     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
   1765   }
   1766   MachineBasicBlock *NextBB = &*++MBB->getIterator();
   1767 
   1768   DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
   1769                << " also invert condition and change dest. to BB#"
   1770                << NextBB->getNumber() << "\n");
   1771 
   1772   // Insert a new conditional branch and a new unconditional branch.
   1773   // Also update the ImmBranch as well as adding a new entry for the new branch.
   1774   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
   1775     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   1776   Br.MI = &MBB->back();
   1777   BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   1778   if (isThumb)
   1779     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
   1780             .addImm(ARMCC::AL).addReg(0);
   1781   else
   1782     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
   1783   BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(&MBB->back());
   1784   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   1785   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
   1786 
   1787   // Remove the old conditional branch.  It may or may not still be in MBB.
   1788   BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(MI);
   1789   MI->eraseFromParent();
   1790   adjustBBOffsetsAfter(MBB);
   1791   return true;
   1792 }
   1793 
   1794 /// undoLRSpillRestore - Remove Thumb push / pop instructions that only spills
   1795 /// LR / restores LR to pc. FIXME: This is done here because it's only possible
   1796 /// to do this if tBfar is not used.
   1797 bool ARMConstantIslands::undoLRSpillRestore() {
   1798   bool MadeChange = false;
   1799   for (unsigned i = 0, e = PushPopMIs.size(); i != e; ++i) {
   1800     MachineInstr *MI = PushPopMIs[i];
   1801     // First two operands are predicates.
   1802     if (MI->getOpcode() == ARM::tPOP_RET &&
   1803         MI->getOperand(2).getReg() == ARM::PC &&
   1804         MI->getNumExplicitOperands() == 3) {
   1805       // Create the new insn and copy the predicate from the old.
   1806       BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
   1807         .addOperand(MI->getOperand(0))
   1808         .addOperand(MI->getOperand(1));
   1809       MI->eraseFromParent();
   1810       MadeChange = true;
   1811     }
   1812   }
   1813   return MadeChange;
   1814 }
   1815 
   1816 // mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
   1817 // below may shrink MI.
   1818 bool
   1819 ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
   1820   switch(MI->getOpcode()) {
   1821     // optimizeThumb2Instructions.
   1822     case ARM::t2LEApcrel:
   1823     case ARM::t2LDRpci:
   1824     // optimizeThumb2Branches.
   1825     case ARM::t2B:
   1826     case ARM::t2Bcc:
   1827     case ARM::tBcc:
   1828     // optimizeThumb2JumpTables.
   1829     case ARM::t2BR_JT:
   1830       return true;
   1831   }
   1832   return false;
   1833 }
   1834 
   1835 bool ARMConstantIslands::optimizeThumb2Instructions() {
   1836   bool MadeChange = false;
   1837 
   1838   // Shrink ADR and LDR from constantpool.
   1839   for (unsigned i = 0, e = CPUsers.size(); i != e; ++i) {
   1840     CPUser &U = CPUsers[i];
   1841     unsigned Opcode = U.MI->getOpcode();
   1842     unsigned NewOpc = 0;
   1843     unsigned Scale = 1;
   1844     unsigned Bits = 0;
   1845     switch (Opcode) {
   1846     default: break;
   1847     case ARM::t2LEApcrel:
   1848       if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
   1849         NewOpc = ARM::tLEApcrel;
   1850         Bits = 8;
   1851         Scale = 4;
   1852       }
   1853       break;
   1854     case ARM::t2LDRpci:
   1855       if (isARMLowRegister(U.MI->getOperand(0).getReg())) {
   1856         NewOpc = ARM::tLDRpci;
   1857         Bits = 8;
   1858         Scale = 4;
   1859       }
   1860       break;
   1861     }
   1862 
   1863     if (!NewOpc)
   1864       continue;
   1865 
   1866     unsigned UserOffset = getUserOffset(U);
   1867     unsigned MaxOffs = ((1 << Bits) - 1) * Scale;
   1868 
   1869     // Be conservative with inline asm.
   1870     if (!U.KnownAlignment)
   1871       MaxOffs -= 2;
   1872 
   1873     // FIXME: Check if offset is multiple of scale if scale is not 4.
   1874     if (isCPEntryInRange(U.MI, UserOffset, U.CPEMI, MaxOffs, false, true)) {
   1875       DEBUG(dbgs() << "Shrink: " << *U.MI);
   1876       U.MI->setDesc(TII->get(NewOpc));
   1877       MachineBasicBlock *MBB = U.MI->getParent();
   1878       BBInfo[MBB->getNumber()].Size -= 2;
   1879       adjustBBOffsetsAfter(MBB);
   1880       ++NumT2CPShrunk;
   1881       MadeChange = true;
   1882     }
   1883   }
   1884 
   1885   MadeChange |= optimizeThumb2Branches();
   1886   MadeChange |= optimizeThumb2JumpTables();
   1887   return MadeChange;
   1888 }
   1889 
   1890 bool ARMConstantIslands::optimizeThumb2Branches() {
   1891   bool MadeChange = false;
   1892 
   1893   // The order in which branches appear in ImmBranches is approximately their
   1894   // order within the function body. By visiting later branches first, we reduce
   1895   // the distance between earlier forward branches and their targets, making it
   1896   // more likely that the cbn?z optimization, which can only apply to forward
   1897   // branches, will succeed.
   1898   for (unsigned i = ImmBranches.size(); i != 0; --i) {
   1899     ImmBranch &Br = ImmBranches[i-1];
   1900     unsigned Opcode = Br.MI->getOpcode();
   1901     unsigned NewOpc = 0;
   1902     unsigned Scale = 1;
   1903     unsigned Bits = 0;
   1904     switch (Opcode) {
   1905     default: break;
   1906     case ARM::t2B:
   1907       NewOpc = ARM::tB;
   1908       Bits = 11;
   1909       Scale = 2;
   1910       break;
   1911     case ARM::t2Bcc: {
   1912       NewOpc = ARM::tBcc;
   1913       Bits = 8;
   1914       Scale = 2;
   1915       break;
   1916     }
   1917     }
   1918     if (NewOpc) {
   1919       unsigned MaxOffs = ((1 << (Bits-1))-1) * Scale;
   1920       MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
   1921       if (isBBInRange(Br.MI, DestBB, MaxOffs)) {
   1922         DEBUG(dbgs() << "Shrink branch: " << *Br.MI);
   1923         Br.MI->setDesc(TII->get(NewOpc));
   1924         MachineBasicBlock *MBB = Br.MI->getParent();
   1925         BBInfo[MBB->getNumber()].Size -= 2;
   1926         adjustBBOffsetsAfter(MBB);
   1927         ++NumT2BrShrunk;
   1928         MadeChange = true;
   1929       }
   1930     }
   1931 
   1932     Opcode = Br.MI->getOpcode();
   1933     if (Opcode != ARM::tBcc)
   1934       continue;
   1935 
   1936     // If the conditional branch doesn't kill CPSR, then CPSR can be liveout
   1937     // so this transformation is not safe.
   1938     if (!Br.MI->killsRegister(ARM::CPSR))
   1939       continue;
   1940 
   1941     NewOpc = 0;
   1942     unsigned PredReg = 0;
   1943     ARMCC::CondCodes Pred = getInstrPredicate(Br.MI, PredReg);
   1944     if (Pred == ARMCC::EQ)
   1945       NewOpc = ARM::tCBZ;
   1946     else if (Pred == ARMCC::NE)
   1947       NewOpc = ARM::tCBNZ;
   1948     if (!NewOpc)
   1949       continue;
   1950     MachineBasicBlock *DestBB = Br.MI->getOperand(0).getMBB();
   1951     // Check if the distance is within 126. Subtract starting offset by 2
   1952     // because the cmp will be eliminated.
   1953     unsigned BrOffset = getOffsetOf(Br.MI) + 4 - 2;
   1954     unsigned DestOffset = BBInfo[DestBB->getNumber()].Offset;
   1955     if (BrOffset < DestOffset && (DestOffset - BrOffset) <= 126) {
   1956       MachineBasicBlock::iterator CmpMI = Br.MI;
   1957       if (CmpMI != Br.MI->getParent()->begin()) {
   1958         --CmpMI;
   1959         if (CmpMI->getOpcode() == ARM::tCMPi8) {
   1960           unsigned Reg = CmpMI->getOperand(0).getReg();
   1961           Pred = getInstrPredicate(CmpMI, PredReg);
   1962           if (Pred == ARMCC::AL &&
   1963               CmpMI->getOperand(1).getImm() == 0 &&
   1964               isARMLowRegister(Reg)) {
   1965             MachineBasicBlock *MBB = Br.MI->getParent();
   1966             DEBUG(dbgs() << "Fold: " << *CmpMI << " and: " << *Br.MI);
   1967             MachineInstr *NewBR =
   1968               BuildMI(*MBB, CmpMI, Br.MI->getDebugLoc(), TII->get(NewOpc))
   1969               .addReg(Reg).addMBB(DestBB,Br.MI->getOperand(0).getTargetFlags());
   1970             CmpMI->eraseFromParent();
   1971             Br.MI->eraseFromParent();
   1972             Br.MI = NewBR;
   1973             BBInfo[MBB->getNumber()].Size -= 2;
   1974             adjustBBOffsetsAfter(MBB);
   1975             ++NumCBZ;
   1976             MadeChange = true;
   1977           }
   1978         }
   1979       }
   1980     }
   1981   }
   1982 
   1983   return MadeChange;
   1984 }
   1985 
   1986 static bool isSimpleIndexCalc(MachineInstr &I, unsigned EntryReg,
   1987                               unsigned BaseReg) {
   1988   if (I.getOpcode() != ARM::t2ADDrs)
   1989     return false;
   1990 
   1991   if (I.getOperand(0).getReg() != EntryReg)
   1992     return false;
   1993 
   1994   if (I.getOperand(1).getReg() != BaseReg)
   1995     return false;
   1996 
   1997   // FIXME: what about CC and IdxReg?
   1998   return true;
   1999 }
   2000 
   2001 /// \brief While trying to form a TBB/TBH instruction, we may (if the table
   2002 /// doesn't immediately follow the BR_JT) need access to the start of the
   2003 /// jump-table. We know one instruction that produces such a register; this
   2004 /// function works out whether that definition can be preserved to the BR_JT,
   2005 /// possibly by removing an intervening addition (which is usually needed to
   2006 /// calculate the actual entry to jump to).
   2007 bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
   2008                                               MachineInstr *LEAMI,
   2009                                               unsigned &DeadSize,
   2010                                               bool &CanDeleteLEA,
   2011                                               bool &BaseRegKill) {
   2012   if (JumpMI->getParent() != LEAMI->getParent())
   2013     return false;
   2014 
   2015   // Now we hope that we have at least these instructions in the basic block:
   2016   //     BaseReg = t2LEA ...
   2017   //     [...]
   2018   //     EntryReg = t2ADDrs BaseReg, ...
   2019   //     [...]
   2020   //     t2BR_JT EntryReg
   2021   //
   2022   // We have to be very conservative about what we recognise here though. The
   2023   // main perturbing factors to watch out for are:
   2024   //    + Spills at any point in the chain: not direct problems but we would
   2025   //      expect a blocking Def of the spilled register so in practice what we
   2026   //      can do is limited.
   2027   //    + EntryReg == BaseReg: this is the one situation we should allow a Def
   2028   //      of BaseReg, but only if the t2ADDrs can be removed.
   2029   //    + Some instruction other than t2ADDrs computing the entry. Not seen in
   2030   //      the wild, but we should be careful.
   2031   unsigned EntryReg = JumpMI->getOperand(0).getReg();
   2032   unsigned BaseReg = LEAMI->getOperand(0).getReg();
   2033 
   2034   CanDeleteLEA = true;
   2035   BaseRegKill = false;
   2036   MachineInstr *RemovableAdd = nullptr;
   2037   MachineBasicBlock::iterator I(LEAMI);
   2038   for (++I; &*I != JumpMI; ++I) {
   2039     if (isSimpleIndexCalc(*I, EntryReg, BaseReg)) {
   2040       RemovableAdd = &*I;
   2041       break;
   2042     }
   2043 
   2044     for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
   2045       const MachineOperand &MO = I->getOperand(K);
   2046       if (!MO.isReg() || !MO.getReg())
   2047         continue;
   2048       if (MO.isDef() && MO.getReg() == BaseReg)
   2049         return false;
   2050       if (MO.isUse() && MO.getReg() == BaseReg) {
   2051         BaseRegKill = BaseRegKill || MO.isKill();
   2052         CanDeleteLEA = false;
   2053       }
   2054     }
   2055   }
   2056 
   2057   if (!RemovableAdd)
   2058     return true;
   2059 
   2060   // Check the add really is removable, and that nothing else in the block
   2061   // clobbers BaseReg.
   2062   for (++I; &*I != JumpMI; ++I) {
   2063     for (unsigned K = 0, E = I->getNumOperands(); K != E; ++K) {
   2064       const MachineOperand &MO = I->getOperand(K);
   2065       if (!MO.isReg() || !MO.getReg())
   2066         continue;
   2067       if (MO.isDef() && MO.getReg() == BaseReg)
   2068         return false;
   2069       if (MO.isUse() && MO.getReg() == EntryReg)
   2070         RemovableAdd = nullptr;
   2071     }
   2072   }
   2073 
   2074   if (RemovableAdd) {
   2075     RemovableAdd->eraseFromParent();
   2076     DeadSize += 4;
   2077   } else if (BaseReg == EntryReg) {
   2078     // The add wasn't removable, but clobbered the base for the TBB. So we can't
   2079     // preserve it.
   2080     return false;
   2081   }
   2082 
   2083   // We reached the end of the block without seeing another definition of
   2084   // BaseReg (except, possibly the t2ADDrs, which was removed). BaseReg can be
   2085   // used in the TBB/TBH if necessary.
   2086   return true;
   2087 }
   2088 
   2089 /// \brief Returns whether CPEMI is the first instruction in the block
   2090 /// immediately following JTMI (assumed to be a TBB or TBH terminator). If so,
   2091 /// we can switch the first register to PC and usually remove the address
   2092 /// calculation that preceded it.
   2093 static bool jumpTableFollowsTB(MachineInstr *JTMI, MachineInstr *CPEMI) {
   2094   MachineFunction::iterator MBB = JTMI->getParent()->getIterator();
   2095   MachineFunction *MF = MBB->getParent();
   2096   ++MBB;
   2097 
   2098   return MBB != MF->end() && MBB->begin() != MBB->end() &&
   2099          &*MBB->begin() == CPEMI;
   2100 }
   2101 
   2102 /// optimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
   2103 /// jumptables when it's possible.
   2104 bool ARMConstantIslands::optimizeThumb2JumpTables() {
   2105   bool MadeChange = false;
   2106 
   2107   // FIXME: After the tables are shrunk, can we get rid some of the
   2108   // constantpool tables?
   2109   MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   2110   if (!MJTI) return false;
   2111 
   2112   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   2113   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
   2114     MachineInstr *MI = T2JumpTables[i];
   2115     const MCInstrDesc &MCID = MI->getDesc();
   2116     unsigned NumOps = MCID.getNumOperands();
   2117     unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
   2118     MachineOperand JTOP = MI->getOperand(JTOpIdx);
   2119     unsigned JTI = JTOP.getIndex();
   2120     assert(JTI < JT.size());
   2121 
   2122     bool ByteOk = true;
   2123     bool HalfWordOk = true;
   2124     unsigned JTOffset = getOffsetOf(MI) + 4;
   2125     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
   2126     for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
   2127       MachineBasicBlock *MBB = JTBBs[j];
   2128       unsigned DstOffset = BBInfo[MBB->getNumber()].Offset;
   2129       // Negative offset is not ok. FIXME: We should change BB layout to make
   2130       // sure all the branches are forward.
   2131       if (ByteOk && (DstOffset - JTOffset) > ((1<<8)-1)*2)
   2132         ByteOk = false;
   2133       unsigned TBHLimit = ((1<<16)-1)*2;
   2134       if (HalfWordOk && (DstOffset - JTOffset) > TBHLimit)
   2135         HalfWordOk = false;
   2136       if (!ByteOk && !HalfWordOk)
   2137         break;
   2138     }
   2139 
   2140     if (!ByteOk && !HalfWordOk)
   2141       continue;
   2142 
   2143     MachineBasicBlock *MBB = MI->getParent();
   2144     if (!MI->getOperand(0).isKill()) // FIXME: needed now?
   2145       continue;
   2146     unsigned IdxReg = MI->getOperand(1).getReg();
   2147     bool IdxRegKill = MI->getOperand(1).isKill();
   2148 
   2149     CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
   2150     unsigned DeadSize = 0;
   2151     bool CanDeleteLEA = false;
   2152     bool BaseRegKill = false;
   2153     bool PreservedBaseReg =
   2154         preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
   2155 
   2156     if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
   2157       continue;
   2158 
   2159     DEBUG(dbgs() << "Shrink JT: " << *MI);
   2160     MachineInstr *CPEMI = User.CPEMI;
   2161     unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
   2162     MachineBasicBlock::iterator MI_JT = MI;
   2163     MachineInstr *NewJTMI =
   2164         BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
   2165             .addReg(User.MI->getOperand(0).getReg(),
   2166                     getKillRegState(BaseRegKill))
   2167             .addReg(IdxReg, getKillRegState(IdxRegKill))
   2168             .addJumpTableIndex(JTI, JTOP.getTargetFlags())
   2169             .addImm(CPEMI->getOperand(0).getImm());
   2170     DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": " << *NewJTMI);
   2171 
   2172     unsigned JTOpc = ByteOk ? ARM::JUMPTABLE_TBB : ARM::JUMPTABLE_TBH;
   2173     CPEMI->setDesc(TII->get(JTOpc));
   2174 
   2175     if (jumpTableFollowsTB(MI, User.CPEMI)) {
   2176       NewJTMI->getOperand(0).setReg(ARM::PC);
   2177       NewJTMI->getOperand(0).setIsKill(false);
   2178 
   2179       if (CanDeleteLEA)  {
   2180         User.MI->eraseFromParent();
   2181         DeadSize += 4;
   2182 
   2183         // The LEA was eliminated, the TBB instruction becomes the only new user
   2184         // of the jump table.
   2185         User.MI = NewJTMI;
   2186         User.MaxDisp = 4;
   2187         User.NegOk = false;
   2188         User.IsSoImm = false;
   2189         User.KnownAlignment = false;
   2190       } else {
   2191         // The LEA couldn't be eliminated, so we must add another CPUser to
   2192         // record the TBB or TBH use.
   2193         int CPEntryIdx = JumpTableEntryIndices[JTI];
   2194         auto &CPEs = CPEntries[CPEntryIdx];
   2195         auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
   2196           return E.CPEMI == User.CPEMI;
   2197         });
   2198         ++Entry->RefCount;
   2199         CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
   2200       }
   2201     }
   2202 
   2203     unsigned NewSize = TII->GetInstSizeInBytes(NewJTMI);
   2204     unsigned OrigSize = TII->GetInstSizeInBytes(MI);
   2205     MI->eraseFromParent();
   2206 
   2207     int Delta = OrigSize - NewSize + DeadSize;
   2208     BBInfo[MBB->getNumber()].Size -= Delta;
   2209     adjustBBOffsetsAfter(MBB);
   2210 
   2211     ++NumTBs;
   2212     MadeChange = true;
   2213   }
   2214 
   2215   return MadeChange;
   2216 }
   2217 
   2218 /// reorderThumb2JumpTables - Adjust the function's block layout to ensure that
   2219 /// jump tables always branch forwards, since that's what tbb and tbh need.
   2220 bool ARMConstantIslands::reorderThumb2JumpTables() {
   2221   bool MadeChange = false;
   2222 
   2223   MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
   2224   if (!MJTI) return false;
   2225 
   2226   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   2227   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
   2228     MachineInstr *MI = T2JumpTables[i];
   2229     const MCInstrDesc &MCID = MI->getDesc();
   2230     unsigned NumOps = MCID.getNumOperands();
   2231     unsigned JTOpIdx = NumOps - (MI->isPredicable() ? 2 : 1);
   2232     MachineOperand JTOP = MI->getOperand(JTOpIdx);
   2233     unsigned JTI = JTOP.getIndex();
   2234     assert(JTI < JT.size());
   2235 
   2236     // We prefer if target blocks for the jump table come after the jump
   2237     // instruction so we can use TB[BH]. Loop through the target blocks
   2238     // and try to adjust them such that that's true.
   2239     int JTNumber = MI->getParent()->getNumber();
   2240     const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
   2241     for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
   2242       MachineBasicBlock *MBB = JTBBs[j];
   2243       int DTNumber = MBB->getNumber();
   2244 
   2245       if (DTNumber < JTNumber) {
   2246         // The destination precedes the switch. Try to move the block forward
   2247         // so we have a positive offset.
   2248         MachineBasicBlock *NewBB =
   2249           adjustJTTargetBlockForward(MBB, MI->getParent());
   2250         if (NewBB)
   2251           MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
   2252         MadeChange = true;
   2253       }
   2254     }
   2255   }
   2256 
   2257   return MadeChange;
   2258 }
   2259 
   2260 MachineBasicBlock *ARMConstantIslands::
   2261 adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
   2262   // If the destination block is terminated by an unconditional branch,
   2263   // try to move it; otherwise, create a new block following the jump
   2264   // table that branches back to the actual target. This is a very simple
   2265   // heuristic. FIXME: We can definitely improve it.
   2266   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   2267   SmallVector<MachineOperand, 4> Cond;
   2268   SmallVector<MachineOperand, 4> CondPrior;
   2269   MachineFunction::iterator BBi = BB->getIterator();
   2270   MachineFunction::iterator OldPrior = std::prev(BBi);
   2271 
   2272   // If the block terminator isn't analyzable, don't try to move the block
   2273   bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
   2274 
   2275   // If the block ends in an unconditional branch, move it. The prior block
   2276   // has to have an analyzable terminator for us to move this one. Be paranoid
   2277   // and make sure we're not trying to move the entry block of the function.
   2278   if (!B && Cond.empty() && BB != MF->begin() &&
   2279       !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
   2280     BB->moveAfter(JTBB);
   2281     OldPrior->updateTerminator();
   2282     BB->updateTerminator();
   2283     // Update numbering to account for the block being moved.
   2284     MF->RenumberBlocks();
   2285     ++NumJTMoved;
   2286     return nullptr;
   2287   }
   2288 
   2289   // Create a new MBB for the code after the jump BB.
   2290   MachineBasicBlock *NewBB =
   2291     MF->CreateMachineBasicBlock(JTBB->getBasicBlock());
   2292   MachineFunction::iterator MBBI = ++JTBB->getIterator();
   2293   MF->insert(MBBI, NewBB);
   2294 
   2295   // Add an unconditional branch from NewBB to BB.
   2296   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   2297   // correspond directly to anything in the source.
   2298   assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
   2299   BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
   2300           .addImm(ARMCC::AL).addReg(0);
   2301 
   2302   // Update internal data structures to account for the newly inserted MBB.
   2303   MF->RenumberBlocks(NewBB);
   2304 
   2305   // Update the CFG.
   2306   NewBB->addSuccessor(BB);
   2307   JTBB->replaceSuccessor(BB, NewBB);
   2308 
   2309   ++NumJTInserted;
   2310   return NewBB;
   2311 }
   2312