Home | History | Annotate | Download | only in ARM
      1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ----*- C++ -*-=//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a pass that performs load / store related peephole
     11 // optimizations. This pass should be run after register allocation.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-ldst-opt"
     16 #include "ARM.h"
     17 #include "ARMAddressingModes.h"
     18 #include "ARMBaseInstrInfo.h"
     19 #include "ARMMachineFunctionInfo.h"
     20 #include "ARMRegisterInfo.h"
     21 #include "llvm/DerivedTypes.h"
     22 #include "llvm/Function.h"
     23 #include "llvm/CodeGen/MachineBasicBlock.h"
     24 #include "llvm/CodeGen/MachineFunctionPass.h"
     25 #include "llvm/CodeGen/MachineInstr.h"
     26 #include "llvm/CodeGen/MachineInstrBuilder.h"
     27 #include "llvm/CodeGen/MachineRegisterInfo.h"
     28 #include "llvm/CodeGen/RegisterScavenging.h"
     29 #include "llvm/Target/TargetData.h"
     30 #include "llvm/Target/TargetInstrInfo.h"
     31 #include "llvm/Target/TargetMachine.h"
     32 #include "llvm/Target/TargetRegisterInfo.h"
     33 #include "llvm/Support/ErrorHandling.h"
     34 #include "llvm/ADT/DenseMap.h"
     35 #include "llvm/ADT/STLExtras.h"
     36 #include "llvm/ADT/SmallPtrSet.h"
     37 #include "llvm/ADT/SmallSet.h"
     38 #include "llvm/ADT/SmallVector.h"
     39 #include "llvm/ADT/Statistic.h"
     40 using namespace llvm;
     41 
     42 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
     43 STATISTIC(NumSTMGened , "Number of stm instructions generated");
     44 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
     45 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
     46 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
     47 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
     48 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
     49 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
     50 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
     51 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
     52 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
     53 
     54 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
     55 /// load / store instructions to form ldm / stm instructions.
     56 
     57 namespace {
     58   struct ARMLoadStoreOpt : public MachineFunctionPass {
     59     static char ID;
     60     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
     61 
     62     const TargetInstrInfo *TII;
     63     const TargetRegisterInfo *TRI;
     64     ARMFunctionInfo *AFI;
     65     RegScavenger *RS;
     66     bool isThumb2;
     67 
     68     virtual bool runOnMachineFunction(MachineFunction &Fn);
     69 
     70     virtual const char *getPassName() const {
     71       return "ARM load / store optimization pass";
     72     }
     73 
     74   private:
     75     struct MemOpQueueEntry {
     76       int Offset;
     77       unsigned Reg;
     78       bool isKill;
     79       unsigned Position;
     80       MachineBasicBlock::iterator MBBI;
     81       bool Merged;
     82       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
     83                       MachineBasicBlock::iterator i)
     84         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     85     };
     86     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     87     typedef MemOpQueue::iterator MemOpQueueIter;
     88 
     89     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     90                   int Offset, unsigned Base, bool BaseKill, int Opcode,
     91                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
     92                   DebugLoc dl, SmallVector<std::pair<unsigned, bool>, 8> &Regs);
     93     void MergeOpsUpdate(MachineBasicBlock &MBB,
     94                         MemOpQueue &MemOps,
     95                         unsigned memOpsBegin,
     96                         unsigned memOpsEnd,
     97                         unsigned insertAfter,
     98                         int Offset,
     99                         unsigned Base,
    100                         bool BaseKill,
    101                         int Opcode,
    102                         ARMCC::CondCodes Pred,
    103                         unsigned PredReg,
    104                         unsigned Scratch,
    105                         DebugLoc dl,
    106                         SmallVector<MachineBasicBlock::iterator, 4> &Merges);
    107     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
    108                       int Opcode, unsigned Size,
    109                       ARMCC::CondCodes Pred, unsigned PredReg,
    110                       unsigned Scratch, MemOpQueue &MemOps,
    111                       SmallVector<MachineBasicBlock::iterator, 4> &Merges);
    112 
    113     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
    114     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
    115                              MachineBasicBlock::iterator &MBBI);
    116     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    117                                   MachineBasicBlock::iterator MBBI,
    118                                   const TargetInstrInfo *TII,
    119                                   bool &Advance,
    120                                   MachineBasicBlock::iterator &I);
    121     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    122                                    MachineBasicBlock::iterator MBBI,
    123                                    bool &Advance,
    124                                    MachineBasicBlock::iterator &I);
    125     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
    126     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
    127   };
    128   char ARMLoadStoreOpt::ID = 0;
    129 }
    130 
    131 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
    132   switch (Opcode) {
    133   default: llvm_unreachable("Unhandled opcode!");
    134   case ARM::LDRi12:
    135     ++NumLDMGened;
    136     switch (Mode) {
    137     default: llvm_unreachable("Unhandled submode!");
    138     case ARM_AM::ia: return ARM::LDMIA;
    139     case ARM_AM::da: return ARM::LDMDA;
    140     case ARM_AM::db: return ARM::LDMDB;
    141     case ARM_AM::ib: return ARM::LDMIB;
    142     }
    143     break;
    144   case ARM::STRi12:
    145     ++NumSTMGened;
    146     switch (Mode) {
    147     default: llvm_unreachable("Unhandled submode!");
    148     case ARM_AM::ia: return ARM::STMIA;
    149     case ARM_AM::da: return ARM::STMDA;
    150     case ARM_AM::db: return ARM::STMDB;
    151     case ARM_AM::ib: return ARM::STMIB;
    152     }
    153     break;
    154   case ARM::t2LDRi8:
    155   case ARM::t2LDRi12:
    156     ++NumLDMGened;
    157     switch (Mode) {
    158     default: llvm_unreachable("Unhandled submode!");
    159     case ARM_AM::ia: return ARM::t2LDMIA;
    160     case ARM_AM::db: return ARM::t2LDMDB;
    161     }
    162     break;
    163   case ARM::t2STRi8:
    164   case ARM::t2STRi12:
    165     ++NumSTMGened;
    166     switch (Mode) {
    167     default: llvm_unreachable("Unhandled submode!");
    168     case ARM_AM::ia: return ARM::t2STMIA;
    169     case ARM_AM::db: return ARM::t2STMDB;
    170     }
    171     break;
    172   case ARM::VLDRS:
    173     ++NumVLDMGened;
    174     switch (Mode) {
    175     default: llvm_unreachable("Unhandled submode!");
    176     case ARM_AM::ia: return ARM::VLDMSIA;
    177     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
    178     }
    179     break;
    180   case ARM::VSTRS:
    181     ++NumVSTMGened;
    182     switch (Mode) {
    183     default: llvm_unreachable("Unhandled submode!");
    184     case ARM_AM::ia: return ARM::VSTMSIA;
    185     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
    186     }
    187     break;
    188   case ARM::VLDRD:
    189     ++NumVLDMGened;
    190     switch (Mode) {
    191     default: llvm_unreachable("Unhandled submode!");
    192     case ARM_AM::ia: return ARM::VLDMDIA;
    193     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
    194     }
    195     break;
    196   case ARM::VSTRD:
    197     ++NumVSTMGened;
    198     switch (Mode) {
    199     default: llvm_unreachable("Unhandled submode!");
    200     case ARM_AM::ia: return ARM::VSTMDIA;
    201     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
    202     }
    203     break;
    204   }
    205 
    206   return 0;
    207 }
    208 
    209 namespace llvm {
    210   namespace ARM_AM {
    211 
    212 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
    213   switch (Opcode) {
    214   default: llvm_unreachable("Unhandled opcode!");
    215   case ARM::LDMIA_RET:
    216   case ARM::LDMIA:
    217   case ARM::LDMIA_UPD:
    218   case ARM::STMIA:
    219   case ARM::STMIA_UPD:
    220   case ARM::t2LDMIA_RET:
    221   case ARM::t2LDMIA:
    222   case ARM::t2LDMIA_UPD:
    223   case ARM::t2STMIA:
    224   case ARM::t2STMIA_UPD:
    225   case ARM::VLDMSIA:
    226   case ARM::VLDMSIA_UPD:
    227   case ARM::VSTMSIA:
    228   case ARM::VSTMSIA_UPD:
    229   case ARM::VLDMDIA:
    230   case ARM::VLDMDIA_UPD:
    231   case ARM::VSTMDIA:
    232   case ARM::VSTMDIA_UPD:
    233     return ARM_AM::ia;
    234 
    235   case ARM::LDMDA:
    236   case ARM::LDMDA_UPD:
    237   case ARM::STMDA:
    238   case ARM::STMDA_UPD:
    239     return ARM_AM::da;
    240 
    241   case ARM::LDMDB:
    242   case ARM::LDMDB_UPD:
    243   case ARM::STMDB:
    244   case ARM::STMDB_UPD:
    245   case ARM::t2LDMDB:
    246   case ARM::t2LDMDB_UPD:
    247   case ARM::t2STMDB:
    248   case ARM::t2STMDB_UPD:
    249   case ARM::VLDMSDB_UPD:
    250   case ARM::VSTMSDB_UPD:
    251   case ARM::VLDMDDB_UPD:
    252   case ARM::VSTMDDB_UPD:
    253     return ARM_AM::db;
    254 
    255   case ARM::LDMIB:
    256   case ARM::LDMIB_UPD:
    257   case ARM::STMIB:
    258   case ARM::STMIB_UPD:
    259     return ARM_AM::ib;
    260   }
    261 
    262   return ARM_AM::bad_am_submode;
    263 }
    264 
    265   } // end namespace ARM_AM
    266 } // end namespace llvm
    267 
    268 static bool isT2i32Load(unsigned Opc) {
    269   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
    270 }
    271 
    272 static bool isi32Load(unsigned Opc) {
    273   return Opc == ARM::LDRi12 || isT2i32Load(Opc);
    274 }
    275 
    276 static bool isT2i32Store(unsigned Opc) {
    277   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
    278 }
    279 
    280 static bool isi32Store(unsigned Opc) {
    281   return Opc == ARM::STRi12 || isT2i32Store(Opc);
    282 }
    283 
    284 /// MergeOps - Create and insert a LDM or STM with Base as base register and
    285 /// registers in Regs as the register operands that would be loaded / stored.
    286 /// It returns true if the transformation is done.
    287 bool
    288 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
    289                           MachineBasicBlock::iterator MBBI,
    290                           int Offset, unsigned Base, bool BaseKill,
    291                           int Opcode, ARMCC::CondCodes Pred,
    292                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
    293                           SmallVector<std::pair<unsigned, bool>, 8> &Regs) {
    294   // Only a single register to load / store. Don't bother.
    295   unsigned NumRegs = Regs.size();
    296   if (NumRegs <= 1)
    297     return false;
    298 
    299   ARM_AM::AMSubMode Mode = ARM_AM::ia;
    300   // VFP and Thumb2 do not support IB or DA modes.
    301   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
    302   bool haveIBAndDA = isNotVFP && !isThumb2;
    303   if (Offset == 4 && haveIBAndDA)
    304     Mode = ARM_AM::ib;
    305   else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
    306     Mode = ARM_AM::da;
    307   else if (Offset == -4 * (int)NumRegs && isNotVFP)
    308     // VLDM/VSTM do not support DB mode without also updating the base reg.
    309     Mode = ARM_AM::db;
    310   else if (Offset != 0) {
    311     // Check if this is a supported opcode before we insert instructions to
    312     // calculate a new base register.
    313     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
    314 
    315     // If starting offset isn't zero, insert a MI to materialize a new base.
    316     // But only do so if it is cost effective, i.e. merging more than two
    317     // loads / stores.
    318     if (NumRegs <= 2)
    319       return false;
    320 
    321     unsigned NewBase;
    322     if (isi32Load(Opcode))
    323       // If it is a load, then just use one of the destination register to
    324       // use as the new base.
    325       NewBase = Regs[NumRegs-1].first;
    326     else {
    327       // Use the scratch register to use as a new base.
    328       NewBase = Scratch;
    329       if (NewBase == 0)
    330         return false;
    331     }
    332     int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
    333     if (Offset < 0) {
    334       BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
    335       Offset = - Offset;
    336     }
    337     int ImmedOffset = isThumb2
    338       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
    339     if (ImmedOffset == -1)
    340       // FIXME: Try t2ADDri12 or t2SUBri12?
    341       return false;  // Probably not worth it then.
    342 
    343     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
    344       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
    345       .addImm(Pred).addReg(PredReg).addReg(0);
    346     Base = NewBase;
    347     BaseKill = true;  // New base is always killed right its use.
    348   }
    349 
    350   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
    351                 Opcode == ARM::VLDRD);
    352   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
    353   if (!Opcode) return false;
    354   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
    355     .addReg(Base, getKillRegState(BaseKill))
    356     .addImm(Pred).addReg(PredReg);
    357   for (unsigned i = 0; i != NumRegs; ++i)
    358     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
    359                      | getKillRegState(Regs[i].second));
    360 
    361   return true;
    362 }
    363 
    364 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
    365 // success.
    366 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
    367                                      MemOpQueue &memOps,
    368                                      unsigned memOpsBegin, unsigned memOpsEnd,
    369                                      unsigned insertAfter, int Offset,
    370                                      unsigned Base, bool BaseKill,
    371                                      int Opcode,
    372                                      ARMCC::CondCodes Pred, unsigned PredReg,
    373                                      unsigned Scratch,
    374                                      DebugLoc dl,
    375                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
    376   // First calculate which of the registers should be killed by the merged
    377   // instruction.
    378   const unsigned insertPos = memOps[insertAfter].Position;
    379   SmallSet<unsigned, 4> KilledRegs;
    380   DenseMap<unsigned, unsigned> Killer;
    381   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
    382     if (i == memOpsBegin) {
    383       i = memOpsEnd;
    384       if (i == e)
    385         break;
    386     }
    387     if (memOps[i].Position < insertPos && memOps[i].isKill) {
    388       unsigned Reg = memOps[i].Reg;
    389       KilledRegs.insert(Reg);
    390       Killer[Reg] = i;
    391     }
    392   }
    393 
    394   SmallVector<std::pair<unsigned, bool>, 8> Regs;
    395   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
    396     unsigned Reg = memOps[i].Reg;
    397     // If we are inserting the merged operation after an operation that
    398     // uses the same register, make sure to transfer any kill flag.
    399     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
    400     Regs.push_back(std::make_pair(Reg, isKill));
    401   }
    402 
    403   // Try to do the merge.
    404   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
    405   ++Loc;
    406   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
    407                 Pred, PredReg, Scratch, dl, Regs))
    408     return;
    409 
    410   // Merge succeeded, update records.
    411   Merges.push_back(prior(Loc));
    412   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
    413     // Remove kill flags from any memops that come before insertPos.
    414     if (Regs[i-memOpsBegin].second) {
    415       unsigned Reg = Regs[i-memOpsBegin].first;
    416       if (KilledRegs.count(Reg)) {
    417         unsigned j = Killer[Reg];
    418         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
    419         assert(Idx >= 0 && "Cannot find killing operand");
    420         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
    421         memOps[j].isKill = false;
    422       }
    423       memOps[i].isKill = true;
    424     }
    425     MBB.erase(memOps[i].MBBI);
    426     // Update this memop to refer to the merged instruction.
    427     // We may need to move kill flags again.
    428     memOps[i].Merged = true;
    429     memOps[i].MBBI = Merges.back();
    430     memOps[i].Position = insertPos;
    431   }
    432 }
    433 
    434 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
    435 /// load / store multiple instructions.
    436 void
    437 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
    438                           unsigned Base, int Opcode, unsigned Size,
    439                           ARMCC::CondCodes Pred, unsigned PredReg,
    440                           unsigned Scratch, MemOpQueue &MemOps,
    441                           SmallVector<MachineBasicBlock::iterator, 4> &Merges) {
    442   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
    443   int Offset = MemOps[SIndex].Offset;
    444   int SOffset = Offset;
    445   unsigned insertAfter = SIndex;
    446   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
    447   DebugLoc dl = Loc->getDebugLoc();
    448   const MachineOperand &PMO = Loc->getOperand(0);
    449   unsigned PReg = PMO.getReg();
    450   unsigned PRegNum = PMO.isUndef() ? UINT_MAX
    451     : getARMRegisterNumbering(PReg);
    452   unsigned Count = 1;
    453   unsigned Limit = ~0U;
    454 
    455   // vldm / vstm limit are 32 for S variants, 16 for D variants.
    456 
    457   switch (Opcode) {
    458   default: break;
    459   case ARM::VSTRS:
    460     Limit = 32;
    461     break;
    462   case ARM::VSTRD:
    463     Limit = 16;
    464     break;
    465   case ARM::VLDRD:
    466     Limit = 16;
    467     break;
    468   case ARM::VLDRS:
    469     Limit = 32;
    470     break;
    471   }
    472 
    473   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
    474     int NewOffset = MemOps[i].Offset;
    475     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
    476     unsigned Reg = MO.getReg();
    477     unsigned RegNum = MO.isUndef() ? UINT_MAX
    478       : getARMRegisterNumbering(Reg);
    479     // Register numbers must be in ascending order. For VFP / NEON load and
    480     // store multiples, the registers must also be consecutive and within the
    481     // limit on the number of registers per instruction.
    482     if (Reg != ARM::SP &&
    483         NewOffset == Offset + (int)Size &&
    484         ((isNotVFP && RegNum > PRegNum) ||
    485          ((Count < Limit) && RegNum == PRegNum+1))) {
    486       Offset += Size;
    487       PRegNum = RegNum;
    488       ++Count;
    489     } else {
    490       // Can't merge this in. Try merge the earlier ones first.
    491       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
    492                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
    493       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
    494                    MemOps, Merges);
    495       return;
    496     }
    497 
    498     if (MemOps[i].Position > MemOps[insertAfter].Position)
    499       insertAfter = i;
    500   }
    501 
    502   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
    503   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
    504                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
    505   return;
    506 }
    507 
    508 static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
    509                                        unsigned Bytes, unsigned Limit,
    510                                        ARMCC::CondCodes Pred, unsigned PredReg){
    511   unsigned MyPredReg = 0;
    512   if (!MI)
    513     return false;
    514   if (MI->getOpcode() != ARM::t2SUBri &&
    515       MI->getOpcode() != ARM::tSUBspi &&
    516       MI->getOpcode() != ARM::SUBri)
    517     return false;
    518 
    519   // Make sure the offset fits in 8 bits.
    520   if (Bytes == 0 || (Limit && Bytes >= Limit))
    521     return false;
    522 
    523   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
    524   return (MI->getOperand(0).getReg() == Base &&
    525           MI->getOperand(1).getReg() == Base &&
    526           (MI->getOperand(2).getImm()*Scale) == Bytes &&
    527           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
    528           MyPredReg == PredReg);
    529 }
    530 
    531 static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
    532                                        unsigned Bytes, unsigned Limit,
    533                                        ARMCC::CondCodes Pred, unsigned PredReg){
    534   unsigned MyPredReg = 0;
    535   if (!MI)
    536     return false;
    537   if (MI->getOpcode() != ARM::t2ADDri &&
    538       MI->getOpcode() != ARM::tADDspi &&
    539       MI->getOpcode() != ARM::ADDri)
    540     return false;
    541 
    542   if (Bytes == 0 || (Limit && Bytes >= Limit))
    543     // Make sure the offset fits in 8 bits.
    544     return false;
    545 
    546   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
    547   return (MI->getOperand(0).getReg() == Base &&
    548           MI->getOperand(1).getReg() == Base &&
    549           (MI->getOperand(2).getImm()*Scale) == Bytes &&
    550           llvm::getInstrPredicate(MI, MyPredReg) == Pred &&
    551           MyPredReg == PredReg);
    552 }
    553 
    554 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
    555   switch (MI->getOpcode()) {
    556   default: return 0;
    557   case ARM::LDRi12:
    558   case ARM::STRi12:
    559   case ARM::t2LDRi8:
    560   case ARM::t2LDRi12:
    561   case ARM::t2STRi8:
    562   case ARM::t2STRi12:
    563   case ARM::VLDRS:
    564   case ARM::VSTRS:
    565     return 4;
    566   case ARM::VLDRD:
    567   case ARM::VSTRD:
    568     return 8;
    569   case ARM::LDMIA:
    570   case ARM::LDMDA:
    571   case ARM::LDMDB:
    572   case ARM::LDMIB:
    573   case ARM::STMIA:
    574   case ARM::STMDA:
    575   case ARM::STMDB:
    576   case ARM::STMIB:
    577   case ARM::t2LDMIA:
    578   case ARM::t2LDMDB:
    579   case ARM::t2STMIA:
    580   case ARM::t2STMDB:
    581   case ARM::VLDMSIA:
    582   case ARM::VSTMSIA:
    583     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
    584   case ARM::VLDMDIA:
    585   case ARM::VSTMDIA:
    586     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
    587   }
    588 }
    589 
    590 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
    591                                             ARM_AM::AMSubMode Mode) {
    592   switch (Opc) {
    593   default: llvm_unreachable("Unhandled opcode!");
    594   case ARM::LDMIA:
    595   case ARM::LDMDA:
    596   case ARM::LDMDB:
    597   case ARM::LDMIB:
    598     switch (Mode) {
    599     default: llvm_unreachable("Unhandled submode!");
    600     case ARM_AM::ia: return ARM::LDMIA_UPD;
    601     case ARM_AM::ib: return ARM::LDMIB_UPD;
    602     case ARM_AM::da: return ARM::LDMDA_UPD;
    603     case ARM_AM::db: return ARM::LDMDB_UPD;
    604     }
    605     break;
    606   case ARM::STMIA:
    607   case ARM::STMDA:
    608   case ARM::STMDB:
    609   case ARM::STMIB:
    610     switch (Mode) {
    611     default: llvm_unreachable("Unhandled submode!");
    612     case ARM_AM::ia: return ARM::STMIA_UPD;
    613     case ARM_AM::ib: return ARM::STMIB_UPD;
    614     case ARM_AM::da: return ARM::STMDA_UPD;
    615     case ARM_AM::db: return ARM::STMDB_UPD;
    616     }
    617     break;
    618   case ARM::t2LDMIA:
    619   case ARM::t2LDMDB:
    620     switch (Mode) {
    621     default: llvm_unreachable("Unhandled submode!");
    622     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
    623     case ARM_AM::db: return ARM::t2LDMDB_UPD;
    624     }
    625     break;
    626   case ARM::t2STMIA:
    627   case ARM::t2STMDB:
    628     switch (Mode) {
    629     default: llvm_unreachable("Unhandled submode!");
    630     case ARM_AM::ia: return ARM::t2STMIA_UPD;
    631     case ARM_AM::db: return ARM::t2STMDB_UPD;
    632     }
    633     break;
    634   case ARM::VLDMSIA:
    635     switch (Mode) {
    636     default: llvm_unreachable("Unhandled submode!");
    637     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
    638     case ARM_AM::db: return ARM::VLDMSDB_UPD;
    639     }
    640     break;
    641   case ARM::VLDMDIA:
    642     switch (Mode) {
    643     default: llvm_unreachable("Unhandled submode!");
    644     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
    645     case ARM_AM::db: return ARM::VLDMDDB_UPD;
    646     }
    647     break;
    648   case ARM::VSTMSIA:
    649     switch (Mode) {
    650     default: llvm_unreachable("Unhandled submode!");
    651     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
    652     case ARM_AM::db: return ARM::VSTMSDB_UPD;
    653     }
    654     break;
    655   case ARM::VSTMDIA:
    656     switch (Mode) {
    657     default: llvm_unreachable("Unhandled submode!");
    658     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
    659     case ARM_AM::db: return ARM::VSTMDDB_UPD;
    660     }
    661     break;
    662   }
    663 
    664   return 0;
    665 }
    666 
    667 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
    668 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
    669 ///
    670 /// stmia rn, <ra, rb, rc>
    671 /// rn := rn + 4 * 3;
    672 /// =>
    673 /// stmia rn!, <ra, rb, rc>
    674 ///
    675 /// rn := rn - 4 * 3;
    676 /// ldmia rn, <ra, rb, rc>
    677 /// =>
    678 /// ldmdb rn!, <ra, rb, rc>
    679 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    680                                                MachineBasicBlock::iterator MBBI,
    681                                                bool &Advance,
    682                                                MachineBasicBlock::iterator &I) {
    683   MachineInstr *MI = MBBI;
    684   unsigned Base = MI->getOperand(0).getReg();
    685   bool BaseKill = MI->getOperand(0).isKill();
    686   unsigned Bytes = getLSMultipleTransferSize(MI);
    687   unsigned PredReg = 0;
    688   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
    689   int Opcode = MI->getOpcode();
    690   DebugLoc dl = MI->getDebugLoc();
    691 
    692   // Can't use an updating ld/st if the base register is also a dest
    693   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
    694   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    695     if (MI->getOperand(i).getReg() == Base)
    696       return false;
    697 
    698   bool DoMerge = false;
    699   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
    700 
    701   // Try merging with the previous instruction.
    702   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    703   if (MBBI != BeginMBBI) {
    704     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
    705     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
    706       --PrevMBBI;
    707     if (Mode == ARM_AM::ia &&
    708         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
    709       Mode = ARM_AM::db;
    710       DoMerge = true;
    711     } else if (Mode == ARM_AM::ib &&
    712                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
    713       Mode = ARM_AM::da;
    714       DoMerge = true;
    715     }
    716     if (DoMerge)
    717       MBB.erase(PrevMBBI);
    718   }
    719 
    720   // Try merging with the next instruction.
    721   MachineBasicBlock::iterator EndMBBI = MBB.end();
    722   if (!DoMerge && MBBI != EndMBBI) {
    723     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
    724     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
    725       ++NextMBBI;
    726     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
    727         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
    728       DoMerge = true;
    729     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
    730                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
    731       DoMerge = true;
    732     }
    733     if (DoMerge) {
    734       if (NextMBBI == I) {
    735         Advance = true;
    736         ++I;
    737       }
    738       MBB.erase(NextMBBI);
    739     }
    740   }
    741 
    742   if (!DoMerge)
    743     return false;
    744 
    745   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
    746   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
    747     .addReg(Base, getDefRegState(true)) // WB base register
    748     .addReg(Base, getKillRegState(BaseKill))
    749     .addImm(Pred).addReg(PredReg);
    750 
    751   // Transfer the rest of operands.
    752   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
    753     MIB.addOperand(MI->getOperand(OpNum));
    754 
    755   // Transfer memoperands.
    756   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
    757 
    758   MBB.erase(MBBI);
    759   return true;
    760 }
    761 
    762 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
    763                                              ARM_AM::AddrOpc Mode) {
    764   switch (Opc) {
    765   case ARM::LDRi12:
    766     return ARM::LDR_PRE;
    767   case ARM::STRi12:
    768     return ARM::STR_PRE;
    769   case ARM::VLDRS:
    770     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    771   case ARM::VLDRD:
    772     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    773   case ARM::VSTRS:
    774     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    775   case ARM::VSTRD:
    776     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    777   case ARM::t2LDRi8:
    778   case ARM::t2LDRi12:
    779     return ARM::t2LDR_PRE;
    780   case ARM::t2STRi8:
    781   case ARM::t2STRi12:
    782     return ARM::t2STR_PRE;
    783   default: llvm_unreachable("Unhandled opcode!");
    784   }
    785   return 0;
    786 }
    787 
    788 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
    789                                               ARM_AM::AddrOpc Mode) {
    790   switch (Opc) {
    791   case ARM::LDRi12:
    792     return ARM::LDR_POST;
    793   case ARM::STRi12:
    794     return ARM::STR_POST;
    795   case ARM::VLDRS:
    796     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    797   case ARM::VLDRD:
    798     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    799   case ARM::VSTRS:
    800     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    801   case ARM::VSTRD:
    802     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    803   case ARM::t2LDRi8:
    804   case ARM::t2LDRi12:
    805     return ARM::t2LDR_POST;
    806   case ARM::t2STRi8:
    807   case ARM::t2STRi12:
    808     return ARM::t2STR_POST;
    809   default: llvm_unreachable("Unhandled opcode!");
    810   }
    811   return 0;
    812 }
    813 
    814 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
    815 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
    816 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    817                                                MachineBasicBlock::iterator MBBI,
    818                                                const TargetInstrInfo *TII,
    819                                                bool &Advance,
    820                                                MachineBasicBlock::iterator &I) {
    821   MachineInstr *MI = MBBI;
    822   unsigned Base = MI->getOperand(1).getReg();
    823   bool BaseKill = MI->getOperand(1).isKill();
    824   unsigned Bytes = getLSMultipleTransferSize(MI);
    825   int Opcode = MI->getOpcode();
    826   DebugLoc dl = MI->getDebugLoc();
    827   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
    828                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
    829   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
    830   if (isi32Load(Opcode) || isi32Store(Opcode))
    831     if (MI->getOperand(2).getImm() != 0)
    832       return false;
    833   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
    834     return false;
    835 
    836   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
    837   // Can't do the merge if the destination register is the same as the would-be
    838   // writeback register.
    839   if (isLd && MI->getOperand(0).getReg() == Base)
    840     return false;
    841 
    842   unsigned PredReg = 0;
    843   ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
    844   bool DoMerge = false;
    845   ARM_AM::AddrOpc AddSub = ARM_AM::add;
    846   unsigned NewOpc = 0;
    847   // AM2 - 12 bits, thumb2 - 8 bits.
    848   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
    849 
    850   // Try merging with the previous instruction.
    851   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    852   if (MBBI != BeginMBBI) {
    853     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
    854     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
    855       --PrevMBBI;
    856     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
    857       DoMerge = true;
    858       AddSub = ARM_AM::sub;
    859     } else if (!isAM5 &&
    860                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
    861       DoMerge = true;
    862     }
    863     if (DoMerge) {
    864       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
    865       MBB.erase(PrevMBBI);
    866     }
    867   }
    868 
    869   // Try merging with the next instruction.
    870   MachineBasicBlock::iterator EndMBBI = MBB.end();
    871   if (!DoMerge && MBBI != EndMBBI) {
    872     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
    873     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
    874       ++NextMBBI;
    875     if (!isAM5 &&
    876         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
    877       DoMerge = true;
    878       AddSub = ARM_AM::sub;
    879     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
    880       DoMerge = true;
    881     }
    882     if (DoMerge) {
    883       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
    884       if (NextMBBI == I) {
    885         Advance = true;
    886         ++I;
    887       }
    888       MBB.erase(NextMBBI);
    889     }
    890   }
    891 
    892   if (!DoMerge)
    893     return false;
    894 
    895   unsigned Offset = 0;
    896   if (isAM2)
    897     Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    898   else if (!isAM5)
    899     Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
    900 
    901   if (isAM5) {
    902     // VLDM[SD}_UPD, VSTM[SD]_UPD
    903     // (There are no base-updating versions of VLDR/VSTR instructions, but the
    904     // updating load/store-multiple instructions can be used with only one
    905     // register.)
    906     MachineOperand &MO = MI->getOperand(0);
    907     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
    908       .addReg(Base, getDefRegState(true)) // WB base register
    909       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
    910       .addImm(Pred).addReg(PredReg)
    911       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
    912                             getKillRegState(MO.isKill())));
    913   } else if (isLd) {
    914     if (isAM2)
    915       // LDR_PRE, LDR_POST,
    916       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
    917         .addReg(Base, RegState::Define)
    918         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
    919     else
    920       // t2LDR_PRE, t2LDR_POST
    921       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
    922         .addReg(Base, RegState::Define)
    923         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    924   } else {
    925     MachineOperand &MO = MI->getOperand(0);
    926     if (isAM2)
    927       // STR_PRE, STR_POST
    928       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
    929         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    930         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
    931     else
    932       // t2STR_PRE, t2STR_POST
    933       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
    934         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    935         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    936   }
    937   MBB.erase(MBBI);
    938 
    939   return true;
    940 }
    941 
    942 /// isMemoryOp - Returns true if instruction is a memory operation that this
    943 /// pass is capable of operating on.
    944 static bool isMemoryOp(const MachineInstr *MI) {
    945   // When no memory operands are present, conservatively assume unaligned,
    946   // volatile, unfoldable.
    947   if (!MI->hasOneMemOperand())
    948     return false;
    949 
    950   const MachineMemOperand *MMO = *MI->memoperands_begin();
    951 
    952   // Don't touch volatile memory accesses - we may be changing their order.
    953   if (MMO->isVolatile())
    954     return false;
    955 
    956   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
    957   // not.
    958   if (MMO->getAlignment() < 4)
    959     return false;
    960 
    961   // str <undef> could probably be eliminated entirely, but for now we just want
    962   // to avoid making a mess of it.
    963   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
    964   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
    965       MI->getOperand(0).isUndef())
    966     return false;
    967 
    968   // Likewise don't mess with references to undefined addresses.
    969   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
    970       MI->getOperand(1).isUndef())
    971     return false;
    972 
    973   int Opcode = MI->getOpcode();
    974   switch (Opcode) {
    975   default: break;
    976   case ARM::VLDRS:
    977   case ARM::VSTRS:
    978     return MI->getOperand(1).isReg();
    979   case ARM::VLDRD:
    980   case ARM::VSTRD:
    981     return MI->getOperand(1).isReg();
    982   case ARM::LDRi12:
    983   case ARM::STRi12:
    984   case ARM::t2LDRi8:
    985   case ARM::t2LDRi12:
    986   case ARM::t2STRi8:
    987   case ARM::t2STRi12:
    988     return MI->getOperand(1).isReg();
    989   }
    990   return false;
    991 }
    992 
    993 /// AdvanceRS - Advance register scavenger to just before the earliest memory
    994 /// op that is being merged.
    995 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
    996   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
    997   unsigned Position = MemOps[0].Position;
    998   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
    999     if (MemOps[i].Position < Position) {
   1000       Position = MemOps[i].Position;
   1001       Loc = MemOps[i].MBBI;
   1002     }
   1003   }
   1004 
   1005   if (Loc != MBB.begin())
   1006     RS->forward(prior(Loc));
   1007 }
   1008 
   1009 static int getMemoryOpOffset(const MachineInstr *MI) {
   1010   int Opcode = MI->getOpcode();
   1011   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   1012   unsigned NumOperands = MI->getDesc().getNumOperands();
   1013   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
   1014 
   1015   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
   1016       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
   1017       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
   1018       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
   1019     return OffField;
   1020 
   1021   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
   1022     : ARM_AM::getAM5Offset(OffField) * 4;
   1023   if (isAM3) {
   1024     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
   1025       Offset = -Offset;
   1026   } else {
   1027     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
   1028       Offset = -Offset;
   1029   }
   1030   return Offset;
   1031 }
   1032 
   1033 static void InsertLDR_STR(MachineBasicBlock &MBB,
   1034                           MachineBasicBlock::iterator &MBBI,
   1035                           int Offset, bool isDef,
   1036                           DebugLoc dl, unsigned NewOpc,
   1037                           unsigned Reg, bool RegDeadKill, bool RegUndef,
   1038                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
   1039                           bool OffKill, bool OffUndef,
   1040                           ARMCC::CondCodes Pred, unsigned PredReg,
   1041                           const TargetInstrInfo *TII, bool isT2) {
   1042   if (isDef) {
   1043     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
   1044                                       TII->get(NewOpc))
   1045       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
   1046       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
   1047     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1048   } else {
   1049     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
   1050                                       TII->get(NewOpc))
   1051       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
   1052       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
   1053     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1054   }
   1055 }
   1056 
   1057 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   1058                                           MachineBasicBlock::iterator &MBBI) {
   1059   MachineInstr *MI = &*MBBI;
   1060   unsigned Opcode = MI->getOpcode();
   1061   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
   1062       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
   1063     unsigned EvenReg = MI->getOperand(0).getReg();
   1064     unsigned OddReg  = MI->getOperand(1).getReg();
   1065     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
   1066     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
   1067     if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)
   1068       return false;
   1069 
   1070     MachineBasicBlock::iterator NewBBI = MBBI;
   1071     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
   1072     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
   1073     bool EvenDeadKill = isLd ?
   1074       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
   1075     bool EvenUndef = MI->getOperand(0).isUndef();
   1076     bool OddDeadKill  = isLd ?
   1077       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
   1078     bool OddUndef = MI->getOperand(1).isUndef();
   1079     const MachineOperand &BaseOp = MI->getOperand(2);
   1080     unsigned BaseReg = BaseOp.getReg();
   1081     bool BaseKill = BaseOp.isKill();
   1082     bool BaseUndef = BaseOp.isUndef();
   1083     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
   1084     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
   1085     int OffImm = getMemoryOpOffset(MI);
   1086     unsigned PredReg = 0;
   1087     ARMCC::CondCodes Pred = llvm::getInstrPredicate(MI, PredReg);
   1088 
   1089     if (OddRegNum > EvenRegNum && OffImm == 0) {
   1090       // Ascending register numbers and no offset. It's safe to change it to a
   1091       // ldm or stm.
   1092       unsigned NewOpc = (isLd)
   1093         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
   1094         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
   1095       if (isLd) {
   1096         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
   1097           .addReg(BaseReg, getKillRegState(BaseKill))
   1098           .addImm(Pred).addReg(PredReg)
   1099           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
   1100           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
   1101         ++NumLDRD2LDM;
   1102       } else {
   1103         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
   1104           .addReg(BaseReg, getKillRegState(BaseKill))
   1105           .addImm(Pred).addReg(PredReg)
   1106           .addReg(EvenReg,
   1107                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
   1108           .addReg(OddReg,
   1109                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
   1110         ++NumSTRD2STM;
   1111       }
   1112       NewBBI = llvm::prior(MBBI);
   1113     } else {
   1114       // Split into two instructions.
   1115       unsigned NewOpc = (isLd)
   1116         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
   1117         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
   1118       DebugLoc dl = MBBI->getDebugLoc();
   1119       // If this is a load and base register is killed, it may have been
   1120       // re-defed by the load, make sure the first load does not clobber it.
   1121       if (isLd &&
   1122           (BaseKill || OffKill) &&
   1123           (TRI->regsOverlap(EvenReg, BaseReg))) {
   1124         assert(!TRI->regsOverlap(OddReg, BaseReg));
   1125         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
   1126                       OddReg, OddDeadKill, false,
   1127                       BaseReg, false, BaseUndef, false, OffUndef,
   1128                       Pred, PredReg, TII, isT2);
   1129         NewBBI = llvm::prior(MBBI);
   1130         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
   1131                       EvenReg, EvenDeadKill, false,
   1132                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
   1133                       Pred, PredReg, TII, isT2);
   1134       } else {
   1135         if (OddReg == EvenReg && EvenDeadKill) {
   1136           // If the two source operands are the same, the kill marker is
   1137           // probably on the first one. e.g.
   1138           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
   1139           EvenDeadKill = false;
   1140           OddDeadKill = true;
   1141         }
   1142         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
   1143                       EvenReg, EvenDeadKill, EvenUndef,
   1144                       BaseReg, false, BaseUndef, false, OffUndef,
   1145                       Pred, PredReg, TII, isT2);
   1146         NewBBI = llvm::prior(MBBI);
   1147         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc,
   1148                       OddReg, OddDeadKill, OddUndef,
   1149                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
   1150                       Pred, PredReg, TII, isT2);
   1151       }
   1152       if (isLd)
   1153         ++NumLDRD2LDR;
   1154       else
   1155         ++NumSTRD2STR;
   1156     }
   1157 
   1158     MBB.erase(MI);
   1159     MBBI = NewBBI;
   1160     return true;
   1161   }
   1162   return false;
   1163 }
   1164 
   1165 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
   1166 /// ops of the same base and incrementing offset into LDM / STM ops.
   1167 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   1168   unsigned NumMerges = 0;
   1169   unsigned NumMemOps = 0;
   1170   MemOpQueue MemOps;
   1171   unsigned CurrBase = 0;
   1172   int CurrOpc = -1;
   1173   unsigned CurrSize = 0;
   1174   ARMCC::CondCodes CurrPred = ARMCC::AL;
   1175   unsigned CurrPredReg = 0;
   1176   unsigned Position = 0;
   1177   SmallVector<MachineBasicBlock::iterator,4> Merges;
   1178 
   1179   RS->enterBasicBlock(&MBB);
   1180   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   1181   while (MBBI != E) {
   1182     if (FixInvalidRegPairOp(MBB, MBBI))
   1183       continue;
   1184 
   1185     bool Advance  = false;
   1186     bool TryMerge = false;
   1187     bool Clobber  = false;
   1188 
   1189     bool isMemOp = isMemoryOp(MBBI);
   1190     if (isMemOp) {
   1191       int Opcode = MBBI->getOpcode();
   1192       unsigned Size = getLSMultipleTransferSize(MBBI);
   1193       const MachineOperand &MO = MBBI->getOperand(0);
   1194       unsigned Reg = MO.getReg();
   1195       bool isKill = MO.isDef() ? false : MO.isKill();
   1196       unsigned Base = MBBI->getOperand(1).getReg();
   1197       unsigned PredReg = 0;
   1198       ARMCC::CondCodes Pred = llvm::getInstrPredicate(MBBI, PredReg);
   1199       int Offset = getMemoryOpOffset(MBBI);
   1200       // Watch out for:
   1201       // r4 := ldr [r5]
   1202       // r5 := ldr [r5, #4]
   1203       // r6 := ldr [r5, #8]
   1204       //
   1205       // The second ldr has effectively broken the chain even though it
   1206       // looks like the later ldr(s) use the same base register. Try to
   1207       // merge the ldr's so far, including this one. But don't try to
   1208       // combine the following ldr(s).
   1209       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
   1210       if (CurrBase == 0 && !Clobber) {
   1211         // Start of a new chain.
   1212         CurrBase = Base;
   1213         CurrOpc  = Opcode;
   1214         CurrSize = Size;
   1215         CurrPred = Pred;
   1216         CurrPredReg = PredReg;
   1217         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
   1218         ++NumMemOps;
   1219         Advance = true;
   1220       } else {
   1221         if (Clobber) {
   1222           TryMerge = true;
   1223           Advance = true;
   1224         }
   1225 
   1226         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
   1227           // No need to match PredReg.
   1228           // Continue adding to the queue.
   1229           if (Offset > MemOps.back().Offset) {
   1230             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
   1231                                              Position, MBBI));
   1232             ++NumMemOps;
   1233             Advance = true;
   1234           } else {
   1235             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
   1236                  I != E; ++I) {
   1237               if (Offset < I->Offset) {
   1238                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
   1239                                                  Position, MBBI));
   1240                 ++NumMemOps;
   1241                 Advance = true;
   1242                 break;
   1243               } else if (Offset == I->Offset) {
   1244                 // Collision! This can't be merged!
   1245                 break;
   1246               }
   1247             }
   1248           }
   1249         }
   1250       }
   1251     }
   1252 
   1253     if (MBBI->isDebugValue()) {
   1254       ++MBBI;
   1255       if (MBBI == E)
   1256         // Reach the end of the block, try merging the memory instructions.
   1257         TryMerge = true;
   1258     } else if (Advance) {
   1259       ++Position;
   1260       ++MBBI;
   1261       if (MBBI == E)
   1262         // Reach the end of the block, try merging the memory instructions.
   1263         TryMerge = true;
   1264     } else
   1265       TryMerge = true;
   1266 
   1267     if (TryMerge) {
   1268       if (NumMemOps > 1) {
   1269         // Try to find a free register to use as a new base in case it's needed.
   1270         // First advance to the instruction just before the start of the chain.
   1271         AdvanceRS(MBB, MemOps);
   1272         // Find a scratch register.
   1273         unsigned Scratch = RS->FindUnusedReg(ARM::GPRRegisterClass);
   1274         // Process the load / store instructions.
   1275         RS->forward(prior(MBBI));
   1276 
   1277         // Merge ops.
   1278         Merges.clear();
   1279         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
   1280                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
   1281 
   1282         // Try folding preceding/trailing base inc/dec into the generated
   1283         // LDM/STM ops.
   1284         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
   1285           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
   1286             ++NumMerges;
   1287         NumMerges += Merges.size();
   1288 
   1289         // Try folding preceding/trailing base inc/dec into those load/store
   1290         // that were not merged to form LDM/STM ops.
   1291         for (unsigned i = 0; i != NumMemOps; ++i)
   1292           if (!MemOps[i].Merged)
   1293             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
   1294               ++NumMerges;
   1295 
   1296         // RS may be pointing to an instruction that's deleted.
   1297         RS->skipTo(prior(MBBI));
   1298       } else if (NumMemOps == 1) {
   1299         // Try folding preceding/trailing base inc/dec into the single
   1300         // load/store.
   1301         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
   1302           ++NumMerges;
   1303           RS->forward(prior(MBBI));
   1304         }
   1305       }
   1306 
   1307       CurrBase = 0;
   1308       CurrOpc = -1;
   1309       CurrSize = 0;
   1310       CurrPred = ARMCC::AL;
   1311       CurrPredReg = 0;
   1312       if (NumMemOps) {
   1313         MemOps.clear();
   1314         NumMemOps = 0;
   1315       }
   1316 
   1317       // If iterator hasn't been advanced and this is not a memory op, skip it.
   1318       // It can't start a new chain anyway.
   1319       if (!Advance && !isMemOp && MBBI != E) {
   1320         ++Position;
   1321         ++MBBI;
   1322       }
   1323     }
   1324   }
   1325   return NumMerges > 0;
   1326 }
   1327 
   1328 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
   1329 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
   1330 /// directly restore the value of LR into pc.
   1331 ///   ldmfd sp!, {..., lr}
   1332 ///   bx lr
   1333 /// or
   1334 ///   ldmfd sp!, {..., lr}
   1335 ///   mov pc, lr
   1336 /// =>
   1337 ///   ldmfd sp!, {..., pc}
   1338 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   1339   if (MBB.empty()) return false;
   1340 
   1341   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   1342   if (MBBI != MBB.begin() &&
   1343       (MBBI->getOpcode() == ARM::BX_RET ||
   1344        MBBI->getOpcode() == ARM::tBX_RET ||
   1345        MBBI->getOpcode() == ARM::MOVPCLR)) {
   1346     MachineInstr *PrevMI = prior(MBBI);
   1347     unsigned Opcode = PrevMI->getOpcode();
   1348     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
   1349         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
   1350         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
   1351       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
   1352       if (MO.getReg() != ARM::LR)
   1353         return false;
   1354       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
   1355       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
   1356               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
   1357       PrevMI->setDesc(TII->get(NewOpc));
   1358       MO.setReg(ARM::PC);
   1359       PrevMI->copyImplicitOps(&*MBBI);
   1360       MBB.erase(MBBI);
   1361       return true;
   1362     }
   1363   }
   1364   return false;
   1365 }
   1366 
   1367 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   1368   const TargetMachine &TM = Fn.getTarget();
   1369   AFI = Fn.getInfo<ARMFunctionInfo>();
   1370   TII = TM.getInstrInfo();
   1371   TRI = TM.getRegisterInfo();
   1372   RS = new RegScavenger();
   1373   isThumb2 = AFI->isThumb2Function();
   1374 
   1375   bool Modified = false;
   1376   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
   1377        ++MFI) {
   1378     MachineBasicBlock &MBB = *MFI;
   1379     Modified |= LoadStoreMultipleOpti(MBB);
   1380     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
   1381       Modified |= MergeReturnIntoLDM(MBB);
   1382   }
   1383 
   1384   delete RS;
   1385   return Modified;
   1386 }
   1387 
   1388 
   1389 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
   1390 /// load / stores from consecutive locations close to make it more
   1391 /// likely they will be combined later.
   1392 
   1393 namespace {
   1394   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
   1395     static char ID;
   1396     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
   1397 
   1398     const TargetData *TD;
   1399     const TargetInstrInfo *TII;
   1400     const TargetRegisterInfo *TRI;
   1401     const ARMSubtarget *STI;
   1402     MachineRegisterInfo *MRI;
   1403     MachineFunction *MF;
   1404 
   1405     virtual bool runOnMachineFunction(MachineFunction &Fn);
   1406 
   1407     virtual const char *getPassName() const {
   1408       return "ARM pre- register allocation load / store optimization pass";
   1409     }
   1410 
   1411   private:
   1412     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
   1413                           unsigned &NewOpc, unsigned &EvenReg,
   1414                           unsigned &OddReg, unsigned &BaseReg,
   1415                           int &Offset,
   1416                           unsigned &PredReg, ARMCC::CondCodes &Pred,
   1417                           bool &isT2);
   1418     bool RescheduleOps(MachineBasicBlock *MBB,
   1419                        SmallVector<MachineInstr*, 4> &Ops,
   1420                        unsigned Base, bool isLd,
   1421                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
   1422     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
   1423   };
   1424   char ARMPreAllocLoadStoreOpt::ID = 0;
   1425 }
   1426 
   1427 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   1428   TD  = Fn.getTarget().getTargetData();
   1429   TII = Fn.getTarget().getInstrInfo();
   1430   TRI = Fn.getTarget().getRegisterInfo();
   1431   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   1432   MRI = &Fn.getRegInfo();
   1433   MF  = &Fn;
   1434 
   1435   bool Modified = false;
   1436   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
   1437        ++MFI)
   1438     Modified |= RescheduleLoadStoreInstrs(MFI);
   1439 
   1440   return Modified;
   1441 }
   1442 
   1443 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   1444                                       MachineBasicBlock::iterator I,
   1445                                       MachineBasicBlock::iterator E,
   1446                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
   1447                                       SmallSet<unsigned, 4> &MemRegs,
   1448                                       const TargetRegisterInfo *TRI) {
   1449   // Are there stores / loads / calls between them?
   1450   // FIXME: This is overly conservative. We should make use of alias information
   1451   // some day.
   1452   SmallSet<unsigned, 4> AddedRegPressure;
   1453   while (++I != E) {
   1454     if (I->isDebugValue() || MemOps.count(&*I))
   1455       continue;
   1456     const MCInstrDesc &MCID = I->getDesc();
   1457     if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
   1458       return false;
   1459     if (isLd && MCID.mayStore())
   1460       return false;
   1461     if (!isLd) {
   1462       if (MCID.mayLoad())
   1463         return false;
   1464       // It's not safe to move the first 'str' down.
   1465       // str r1, [r0]
   1466       // strh r5, [r0]
   1467       // str r4, [r0, #+4]
   1468       if (MCID.mayStore())
   1469         return false;
   1470     }
   1471     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
   1472       MachineOperand &MO = I->getOperand(j);
   1473       if (!MO.isReg())
   1474         continue;
   1475       unsigned Reg = MO.getReg();
   1476       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
   1477         return false;
   1478       if (Reg != Base && !MemRegs.count(Reg))
   1479         AddedRegPressure.insert(Reg);
   1480     }
   1481   }
   1482 
   1483   // Estimate register pressure increase due to the transformation.
   1484   if (MemRegs.size() <= 4)
   1485     // Ok if we are moving small number of instructions.
   1486     return true;
   1487   return AddedRegPressure.size() <= MemRegs.size() * 2;
   1488 }
   1489 
   1490 bool
   1491 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   1492                                           DebugLoc &dl,
   1493                                           unsigned &NewOpc, unsigned &EvenReg,
   1494                                           unsigned &OddReg, unsigned &BaseReg,
   1495                                           int &Offset, unsigned &PredReg,
   1496                                           ARMCC::CondCodes &Pred,
   1497                                           bool &isT2) {
   1498   // Make sure we're allowed to generate LDRD/STRD.
   1499   if (!STI->hasV5TEOps())
   1500     return false;
   1501 
   1502   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
   1503   unsigned Scale = 1;
   1504   unsigned Opcode = Op0->getOpcode();
   1505   if (Opcode == ARM::LDRi12)
   1506     NewOpc = ARM::LDRD;
   1507   else if (Opcode == ARM::STRi12)
   1508     NewOpc = ARM::STRD;
   1509   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
   1510     NewOpc = ARM::t2LDRDi8;
   1511     Scale = 4;
   1512     isT2 = true;
   1513   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
   1514     NewOpc = ARM::t2STRDi8;
   1515     Scale = 4;
   1516     isT2 = true;
   1517   } else
   1518     return false;
   1519 
   1520   // Make sure the base address satisfies i64 ld / st alignment requirement.
   1521   if (!Op0->hasOneMemOperand() ||
   1522       !(*Op0->memoperands_begin())->getValue() ||
   1523       (*Op0->memoperands_begin())->isVolatile())
   1524     return false;
   1525 
   1526   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
   1527   const Function *Func = MF->getFunction();
   1528   unsigned ReqAlign = STI->hasV6Ops()
   1529     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
   1530     : 8;  // Pre-v6 need 8-byte align
   1531   if (Align < ReqAlign)
   1532     return false;
   1533 
   1534   // Then make sure the immediate offset fits.
   1535   int OffImm = getMemoryOpOffset(Op0);
   1536   if (isT2) {
   1537     int Limit = (1 << 8) * Scale;
   1538     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
   1539       return false;
   1540     Offset = OffImm;
   1541   } else {
   1542     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1543     if (OffImm < 0) {
   1544       AddSub = ARM_AM::sub;
   1545       OffImm = - OffImm;
   1546     }
   1547     int Limit = (1 << 8) * Scale;
   1548     if (OffImm >= Limit || (OffImm & (Scale-1)))
   1549       return false;
   1550     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
   1551   }
   1552   EvenReg = Op0->getOperand(0).getReg();
   1553   OddReg  = Op1->getOperand(0).getReg();
   1554   if (EvenReg == OddReg)
   1555     return false;
   1556   BaseReg = Op0->getOperand(1).getReg();
   1557   Pred = llvm::getInstrPredicate(Op0, PredReg);
   1558   dl = Op0->getDebugLoc();
   1559   return true;
   1560 }
   1561 
   1562 namespace {
   1563   struct OffsetCompare {
   1564     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
   1565       int LOffset = getMemoryOpOffset(LHS);
   1566       int ROffset = getMemoryOpOffset(RHS);
   1567       assert(LHS == RHS || LOffset != ROffset);
   1568       return LOffset > ROffset;
   1569     }
   1570   };
   1571 }
   1572 
   1573 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
   1574                                  SmallVector<MachineInstr*, 4> &Ops,
   1575                                  unsigned Base, bool isLd,
   1576                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
   1577   bool RetVal = false;
   1578 
   1579   // Sort by offset (in reverse order).
   1580   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
   1581 
   1582   // The loads / stores of the same base are in order. Scan them from first to
   1583   // last and check for the following:
   1584   // 1. Any def of base.
   1585   // 2. Any gaps.
   1586   while (Ops.size() > 1) {
   1587     unsigned FirstLoc = ~0U;
   1588     unsigned LastLoc = 0;
   1589     MachineInstr *FirstOp = 0;
   1590     MachineInstr *LastOp = 0;
   1591     int LastOffset = 0;
   1592     unsigned LastOpcode = 0;
   1593     unsigned LastBytes = 0;
   1594     unsigned NumMove = 0;
   1595     for (int i = Ops.size() - 1; i >= 0; --i) {
   1596       MachineInstr *Op = Ops[i];
   1597       unsigned Loc = MI2LocMap[Op];
   1598       if (Loc <= FirstLoc) {
   1599         FirstLoc = Loc;
   1600         FirstOp = Op;
   1601       }
   1602       if (Loc >= LastLoc) {
   1603         LastLoc = Loc;
   1604         LastOp = Op;
   1605       }
   1606 
   1607       unsigned Opcode = Op->getOpcode();
   1608       if (LastOpcode && Opcode != LastOpcode)
   1609         break;
   1610 
   1611       int Offset = getMemoryOpOffset(Op);
   1612       unsigned Bytes = getLSMultipleTransferSize(Op);
   1613       if (LastBytes) {
   1614         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
   1615           break;
   1616       }
   1617       LastOffset = Offset;
   1618       LastBytes = Bytes;
   1619       LastOpcode = Opcode;
   1620       if (++NumMove == 8) // FIXME: Tune this limit.
   1621         break;
   1622     }
   1623 
   1624     if (NumMove <= 1)
   1625       Ops.pop_back();
   1626     else {
   1627       SmallPtrSet<MachineInstr*, 4> MemOps;
   1628       SmallSet<unsigned, 4> MemRegs;
   1629       for (int i = NumMove-1; i >= 0; --i) {
   1630         MemOps.insert(Ops[i]);
   1631         MemRegs.insert(Ops[i]->getOperand(0).getReg());
   1632       }
   1633 
   1634       // Be conservative, if the instructions are too far apart, don't
   1635       // move them. We want to limit the increase of register pressure.
   1636       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
   1637       if (DoMove)
   1638         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
   1639                                            MemOps, MemRegs, TRI);
   1640       if (!DoMove) {
   1641         for (unsigned i = 0; i != NumMove; ++i)
   1642           Ops.pop_back();
   1643       } else {
   1644         // This is the new location for the loads / stores.
   1645         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
   1646         while (InsertPos != MBB->end()
   1647                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
   1648           ++InsertPos;
   1649 
   1650         // If we are moving a pair of loads / stores, see if it makes sense
   1651         // to try to allocate a pair of registers that can form register pairs.
   1652         MachineInstr *Op0 = Ops.back();
   1653         MachineInstr *Op1 = Ops[Ops.size()-2];
   1654         unsigned EvenReg = 0, OddReg = 0;
   1655         unsigned BaseReg = 0, PredReg = 0;
   1656         ARMCC::CondCodes Pred = ARMCC::AL;
   1657         bool isT2 = false;
   1658         unsigned NewOpc = 0;
   1659         int Offset = 0;
   1660         DebugLoc dl;
   1661         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
   1662                                              EvenReg, OddReg, BaseReg,
   1663                                              Offset, PredReg, Pred, isT2)) {
   1664           Ops.pop_back();
   1665           Ops.pop_back();
   1666 
   1667           const MCInstrDesc &MCID = TII->get(NewOpc);
   1668           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
   1669           MRI->constrainRegClass(EvenReg, TRC);
   1670           MRI->constrainRegClass(OddReg, TRC);
   1671 
   1672           // Form the pair instruction.
   1673           if (isLd) {
   1674             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
   1675               .addReg(EvenReg, RegState::Define)
   1676               .addReg(OddReg, RegState::Define)
   1677               .addReg(BaseReg);
   1678             // FIXME: We're converting from LDRi12 to an insn that still
   1679             // uses addrmode2, so we need an explicit offset reg. It should
   1680             // always by reg0 since we're transforming LDRi12s.
   1681             if (!isT2)
   1682               MIB.addReg(0);
   1683             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1684             ++NumLDRDFormed;
   1685           } else {
   1686             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
   1687               .addReg(EvenReg)
   1688               .addReg(OddReg)
   1689               .addReg(BaseReg);
   1690             // FIXME: We're converting from LDRi12 to an insn that still
   1691             // uses addrmode2, so we need an explicit offset reg. It should
   1692             // always by reg0 since we're transforming STRi12s.
   1693             if (!isT2)
   1694               MIB.addReg(0);
   1695             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1696             ++NumSTRDFormed;
   1697           }
   1698           MBB->erase(Op0);
   1699           MBB->erase(Op1);
   1700 
   1701           // Add register allocation hints to form register pairs.
   1702           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
   1703           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
   1704         } else {
   1705           for (unsigned i = 0; i != NumMove; ++i) {
   1706             MachineInstr *Op = Ops.back();
   1707             Ops.pop_back();
   1708             MBB->splice(InsertPos, MBB, Op);
   1709           }
   1710         }
   1711 
   1712         NumLdStMoved += NumMove;
   1713         RetVal = true;
   1714       }
   1715     }
   1716   }
   1717 
   1718   return RetVal;
   1719 }
   1720 
   1721 bool
   1722 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   1723   bool RetVal = false;
   1724 
   1725   DenseMap<MachineInstr*, unsigned> MI2LocMap;
   1726   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
   1727   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
   1728   SmallVector<unsigned, 4> LdBases;
   1729   SmallVector<unsigned, 4> StBases;
   1730 
   1731   unsigned Loc = 0;
   1732   MachineBasicBlock::iterator MBBI = MBB->begin();
   1733   MachineBasicBlock::iterator E = MBB->end();
   1734   while (MBBI != E) {
   1735     for (; MBBI != E; ++MBBI) {
   1736       MachineInstr *MI = MBBI;
   1737       const MCInstrDesc &MCID = MI->getDesc();
   1738       if (MCID.isCall() || MCID.isTerminator()) {
   1739         // Stop at barriers.
   1740         ++MBBI;
   1741         break;
   1742       }
   1743 
   1744       if (!MI->isDebugValue())
   1745         MI2LocMap[MI] = ++Loc;
   1746 
   1747       if (!isMemoryOp(MI))
   1748         continue;
   1749       unsigned PredReg = 0;
   1750       if (llvm::getInstrPredicate(MI, PredReg) != ARMCC::AL)
   1751         continue;
   1752 
   1753       int Opc = MI->getOpcode();
   1754       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
   1755       unsigned Base = MI->getOperand(1).getReg();
   1756       int Offset = getMemoryOpOffset(MI);
   1757 
   1758       bool StopHere = false;
   1759       if (isLd) {
   1760         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
   1761           Base2LdsMap.find(Base);
   1762         if (BI != Base2LdsMap.end()) {
   1763           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
   1764             if (Offset == getMemoryOpOffset(BI->second[i])) {
   1765               StopHere = true;
   1766               break;
   1767             }
   1768           }
   1769           if (!StopHere)
   1770             BI->second.push_back(MI);
   1771         } else {
   1772           SmallVector<MachineInstr*, 4> MIs;
   1773           MIs.push_back(MI);
   1774           Base2LdsMap[Base] = MIs;
   1775           LdBases.push_back(Base);
   1776         }
   1777       } else {
   1778         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
   1779           Base2StsMap.find(Base);
   1780         if (BI != Base2StsMap.end()) {
   1781           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
   1782             if (Offset == getMemoryOpOffset(BI->second[i])) {
   1783               StopHere = true;
   1784               break;
   1785             }
   1786           }
   1787           if (!StopHere)
   1788             BI->second.push_back(MI);
   1789         } else {
   1790           SmallVector<MachineInstr*, 4> MIs;
   1791           MIs.push_back(MI);
   1792           Base2StsMap[Base] = MIs;
   1793           StBases.push_back(Base);
   1794         }
   1795       }
   1796 
   1797       if (StopHere) {
   1798         // Found a duplicate (a base+offset combination that's seen earlier).
   1799         // Backtrack.
   1800         --Loc;
   1801         break;
   1802       }
   1803     }
   1804 
   1805     // Re-schedule loads.
   1806     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
   1807       unsigned Base = LdBases[i];
   1808       SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
   1809       if (Lds.size() > 1)
   1810         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
   1811     }
   1812 
   1813     // Re-schedule stores.
   1814     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
   1815       unsigned Base = StBases[i];
   1816       SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
   1817       if (Sts.size() > 1)
   1818         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
   1819     }
   1820 
   1821     if (MBBI != E) {
   1822       Base2LdsMap.clear();
   1823       Base2StsMap.clear();
   1824       LdBases.clear();
   1825       StBases.clear();
   1826     }
   1827   }
   1828 
   1829   return RetVal;
   1830 }
   1831 
   1832 
   1833 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
   1834 /// optimization pass.
   1835 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
   1836   if (PreAlloc)
   1837     return new ARMPreAllocLoadStoreOpt();
   1838   return new ARMLoadStoreOpt();
   1839 }
   1840