Home | History | Annotate | Download | only in ARM
      1 //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains a pass that performs load / store related peephole
     11 // optimizations. This pass should be run after register allocation.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-ldst-opt"
     16 #include "ARM.h"
     17 #include "ARMBaseInstrInfo.h"
     18 #include "ARMBaseRegisterInfo.h"
     19 #include "ARMMachineFunctionInfo.h"
     20 #include "MCTargetDesc/ARMAddressingModes.h"
     21 #include "llvm/ADT/DenseMap.h"
     22 #include "llvm/ADT/STLExtras.h"
     23 #include "llvm/ADT/SmallPtrSet.h"
     24 #include "llvm/ADT/SmallSet.h"
     25 #include "llvm/ADT/SmallVector.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/CodeGen/MachineBasicBlock.h"
     28 #include "llvm/CodeGen/MachineFunctionPass.h"
     29 #include "llvm/CodeGen/MachineInstr.h"
     30 #include "llvm/CodeGen/MachineInstrBuilder.h"
     31 #include "llvm/CodeGen/MachineRegisterInfo.h"
     32 #include "llvm/CodeGen/RegisterScavenging.h"
     33 #include "llvm/CodeGen/SelectionDAGNodes.h"
     34 #include "llvm/IR/DataLayout.h"
     35 #include "llvm/IR/DerivedTypes.h"
     36 #include "llvm/IR/Function.h"
     37 #include "llvm/Support/Debug.h"
     38 #include "llvm/Support/ErrorHandling.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include "llvm/Target/TargetInstrInfo.h"
     41 #include "llvm/Target/TargetMachine.h"
     42 #include "llvm/Target/TargetRegisterInfo.h"
     43 using namespace llvm;
     44 
     45 STATISTIC(NumLDMGened , "Number of ldm instructions generated");
     46 STATISTIC(NumSTMGened , "Number of stm instructions generated");
     47 STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
     48 STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
     49 STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
     50 STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
     51 STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
     52 STATISTIC(NumLDRD2LDM,  "Number of ldrd instructions turned back into ldm");
     53 STATISTIC(NumSTRD2STM,  "Number of strd instructions turned back into stm");
     54 STATISTIC(NumLDRD2LDR,  "Number of ldrd instructions turned back into ldr's");
     55 STATISTIC(NumSTRD2STR,  "Number of strd instructions turned back into str's");
     56 
     57 /// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
     58 /// load / store instructions to form ldm / stm instructions.
     59 
     60 namespace {
     61   struct ARMLoadStoreOpt : public MachineFunctionPass {
     62     static char ID;
     63     ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
     64 
     65     const TargetInstrInfo *TII;
     66     const TargetRegisterInfo *TRI;
     67     const ARMSubtarget *STI;
     68     ARMFunctionInfo *AFI;
     69     RegScavenger *RS;
     70     bool isThumb2;
     71 
     72     virtual bool runOnMachineFunction(MachineFunction &Fn);
     73 
     74     virtual const char *getPassName() const {
     75       return "ARM load / store optimization pass";
     76     }
     77 
     78   private:
     79     struct MemOpQueueEntry {
     80       int Offset;
     81       unsigned Reg;
     82       bool isKill;
     83       unsigned Position;
     84       MachineBasicBlock::iterator MBBI;
     85       bool Merged;
     86       MemOpQueueEntry(int o, unsigned r, bool k, unsigned p,
     87                       MachineBasicBlock::iterator i)
     88         : Offset(o), Reg(r), isKill(k), Position(p), MBBI(i), Merged(false) {}
     89     };
     90     typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
     91     typedef MemOpQueue::iterator MemOpQueueIter;
     92 
     93     bool MergeOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     94                   int Offset, unsigned Base, bool BaseKill, int Opcode,
     95                   ARMCC::CondCodes Pred, unsigned PredReg, unsigned Scratch,
     96                   DebugLoc dl,
     97                   ArrayRef<std::pair<unsigned, bool> > Regs,
     98                   ArrayRef<unsigned> ImpDefs);
     99     void MergeOpsUpdate(MachineBasicBlock &MBB,
    100                         MemOpQueue &MemOps,
    101                         unsigned memOpsBegin,
    102                         unsigned memOpsEnd,
    103                         unsigned insertAfter,
    104                         int Offset,
    105                         unsigned Base,
    106                         bool BaseKill,
    107                         int Opcode,
    108                         ARMCC::CondCodes Pred,
    109                         unsigned PredReg,
    110                         unsigned Scratch,
    111                         DebugLoc dl,
    112                         SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
    113     void MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex, unsigned Base,
    114                       int Opcode, unsigned Size,
    115                       ARMCC::CondCodes Pred, unsigned PredReg,
    116                       unsigned Scratch, MemOpQueue &MemOps,
    117                       SmallVectorImpl<MachineBasicBlock::iterator> &Merges);
    118 
    119     void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps);
    120     bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
    121                              MachineBasicBlock::iterator &MBBI);
    122     bool MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    123                                   MachineBasicBlock::iterator MBBI,
    124                                   const TargetInstrInfo *TII,
    125                                   bool &Advance,
    126                                   MachineBasicBlock::iterator &I);
    127     bool MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    128                                    MachineBasicBlock::iterator MBBI,
    129                                    bool &Advance,
    130                                    MachineBasicBlock::iterator &I);
    131     bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
    132     bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
    133   };
    134   char ARMLoadStoreOpt::ID = 0;
    135 }
    136 
    137 static int getLoadStoreMultipleOpcode(int Opcode, ARM_AM::AMSubMode Mode) {
    138   switch (Opcode) {
    139   default: llvm_unreachable("Unhandled opcode!");
    140   case ARM::LDRi12:
    141     ++NumLDMGened;
    142     switch (Mode) {
    143     default: llvm_unreachable("Unhandled submode!");
    144     case ARM_AM::ia: return ARM::LDMIA;
    145     case ARM_AM::da: return ARM::LDMDA;
    146     case ARM_AM::db: return ARM::LDMDB;
    147     case ARM_AM::ib: return ARM::LDMIB;
    148     }
    149   case ARM::STRi12:
    150     ++NumSTMGened;
    151     switch (Mode) {
    152     default: llvm_unreachable("Unhandled submode!");
    153     case ARM_AM::ia: return ARM::STMIA;
    154     case ARM_AM::da: return ARM::STMDA;
    155     case ARM_AM::db: return ARM::STMDB;
    156     case ARM_AM::ib: return ARM::STMIB;
    157     }
    158   case ARM::t2LDRi8:
    159   case ARM::t2LDRi12:
    160     ++NumLDMGened;
    161     switch (Mode) {
    162     default: llvm_unreachable("Unhandled submode!");
    163     case ARM_AM::ia: return ARM::t2LDMIA;
    164     case ARM_AM::db: return ARM::t2LDMDB;
    165     }
    166   case ARM::t2STRi8:
    167   case ARM::t2STRi12:
    168     ++NumSTMGened;
    169     switch (Mode) {
    170     default: llvm_unreachable("Unhandled submode!");
    171     case ARM_AM::ia: return ARM::t2STMIA;
    172     case ARM_AM::db: return ARM::t2STMDB;
    173     }
    174   case ARM::VLDRS:
    175     ++NumVLDMGened;
    176     switch (Mode) {
    177     default: llvm_unreachable("Unhandled submode!");
    178     case ARM_AM::ia: return ARM::VLDMSIA;
    179     case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
    180     }
    181   case ARM::VSTRS:
    182     ++NumVSTMGened;
    183     switch (Mode) {
    184     default: llvm_unreachable("Unhandled submode!");
    185     case ARM_AM::ia: return ARM::VSTMSIA;
    186     case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
    187     }
    188   case ARM::VLDRD:
    189     ++NumVLDMGened;
    190     switch (Mode) {
    191     default: llvm_unreachable("Unhandled submode!");
    192     case ARM_AM::ia: return ARM::VLDMDIA;
    193     case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
    194     }
    195   case ARM::VSTRD:
    196     ++NumVSTMGened;
    197     switch (Mode) {
    198     default: llvm_unreachable("Unhandled submode!");
    199     case ARM_AM::ia: return ARM::VSTMDIA;
    200     case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
    201     }
    202   }
    203 }
    204 
    205 namespace llvm {
    206   namespace ARM_AM {
    207 
    208 AMSubMode getLoadStoreMultipleSubMode(int Opcode) {
    209   switch (Opcode) {
    210   default: llvm_unreachable("Unhandled opcode!");
    211   case ARM::LDMIA_RET:
    212   case ARM::LDMIA:
    213   case ARM::LDMIA_UPD:
    214   case ARM::STMIA:
    215   case ARM::STMIA_UPD:
    216   case ARM::t2LDMIA_RET:
    217   case ARM::t2LDMIA:
    218   case ARM::t2LDMIA_UPD:
    219   case ARM::t2STMIA:
    220   case ARM::t2STMIA_UPD:
    221   case ARM::VLDMSIA:
    222   case ARM::VLDMSIA_UPD:
    223   case ARM::VSTMSIA:
    224   case ARM::VSTMSIA_UPD:
    225   case ARM::VLDMDIA:
    226   case ARM::VLDMDIA_UPD:
    227   case ARM::VSTMDIA:
    228   case ARM::VSTMDIA_UPD:
    229     return ARM_AM::ia;
    230 
    231   case ARM::LDMDA:
    232   case ARM::LDMDA_UPD:
    233   case ARM::STMDA:
    234   case ARM::STMDA_UPD:
    235     return ARM_AM::da;
    236 
    237   case ARM::LDMDB:
    238   case ARM::LDMDB_UPD:
    239   case ARM::STMDB:
    240   case ARM::STMDB_UPD:
    241   case ARM::t2LDMDB:
    242   case ARM::t2LDMDB_UPD:
    243   case ARM::t2STMDB:
    244   case ARM::t2STMDB_UPD:
    245   case ARM::VLDMSDB_UPD:
    246   case ARM::VSTMSDB_UPD:
    247   case ARM::VLDMDDB_UPD:
    248   case ARM::VSTMDDB_UPD:
    249     return ARM_AM::db;
    250 
    251   case ARM::LDMIB:
    252   case ARM::LDMIB_UPD:
    253   case ARM::STMIB:
    254   case ARM::STMIB_UPD:
    255     return ARM_AM::ib;
    256   }
    257 }
    258 
    259   } // end namespace ARM_AM
    260 } // end namespace llvm
    261 
    262 static bool isT2i32Load(unsigned Opc) {
    263   return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
    264 }
    265 
    266 static bool isi32Load(unsigned Opc) {
    267   return Opc == ARM::LDRi12 || isT2i32Load(Opc);
    268 }
    269 
    270 static bool isT2i32Store(unsigned Opc) {
    271   return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
    272 }
    273 
    274 static bool isi32Store(unsigned Opc) {
    275   return Opc == ARM::STRi12 || isT2i32Store(Opc);
    276 }
    277 
    278 /// MergeOps - Create and insert a LDM or STM with Base as base register and
    279 /// registers in Regs as the register operands that would be loaded / stored.
    280 /// It returns true if the transformation is done.
    281 bool
    282 ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
    283                           MachineBasicBlock::iterator MBBI,
    284                           int Offset, unsigned Base, bool BaseKill,
    285                           int Opcode, ARMCC::CondCodes Pred,
    286                           unsigned PredReg, unsigned Scratch, DebugLoc dl,
    287                           ArrayRef<std::pair<unsigned, bool> > Regs,
    288                           ArrayRef<unsigned> ImpDefs) {
    289   // Only a single register to load / store. Don't bother.
    290   unsigned NumRegs = Regs.size();
    291   if (NumRegs <= 1)
    292     return false;
    293 
    294   ARM_AM::AMSubMode Mode = ARM_AM::ia;
    295   // VFP and Thumb2 do not support IB or DA modes.
    296   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
    297   bool haveIBAndDA = isNotVFP && !isThumb2;
    298   if (Offset == 4 && haveIBAndDA)
    299     Mode = ARM_AM::ib;
    300   else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA)
    301     Mode = ARM_AM::da;
    302   else if (Offset == -4 * (int)NumRegs && isNotVFP)
    303     // VLDM/VSTM do not support DB mode without also updating the base reg.
    304     Mode = ARM_AM::db;
    305   else if (Offset != 0) {
    306     // Check if this is a supported opcode before we insert instructions to
    307     // calculate a new base register.
    308     if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return false;
    309 
    310     // If starting offset isn't zero, insert a MI to materialize a new base.
    311     // But only do so if it is cost effective, i.e. merging more than two
    312     // loads / stores.
    313     if (NumRegs <= 2)
    314       return false;
    315 
    316     unsigned NewBase;
    317     if (isi32Load(Opcode))
    318       // If it is a load, then just use one of the destination register to
    319       // use as the new base.
    320       NewBase = Regs[NumRegs-1].first;
    321     else {
    322       // Use the scratch register to use as a new base.
    323       NewBase = Scratch;
    324       if (NewBase == 0)
    325         return false;
    326     }
    327     int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
    328     if (Offset < 0) {
    329       BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
    330       Offset = - Offset;
    331     }
    332     int ImmedOffset = isThumb2
    333       ? ARM_AM::getT2SOImmVal(Offset) : ARM_AM::getSOImmVal(Offset);
    334     if (ImmedOffset == -1)
    335       // FIXME: Try t2ADDri12 or t2SUBri12?
    336       return false;  // Probably not worth it then.
    337 
    338     BuildMI(MBB, MBBI, dl, TII->get(BaseOpc), NewBase)
    339       .addReg(Base, getKillRegState(BaseKill)).addImm(Offset)
    340       .addImm(Pred).addReg(PredReg).addReg(0);
    341     Base = NewBase;
    342     BaseKill = true;  // New base is always killed right its use.
    343   }
    344 
    345   bool isDef = (isi32Load(Opcode) || Opcode == ARM::VLDRS ||
    346                 Opcode == ARM::VLDRD);
    347   Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
    348   if (!Opcode) return false;
    349   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode))
    350     .addReg(Base, getKillRegState(BaseKill))
    351     .addImm(Pred).addReg(PredReg);
    352   for (unsigned i = 0; i != NumRegs; ++i)
    353     MIB = MIB.addReg(Regs[i].first, getDefRegState(isDef)
    354                      | getKillRegState(Regs[i].second));
    355 
    356   // Add implicit defs for super-registers.
    357   for (unsigned i = 0, e = ImpDefs.size(); i != e; ++i)
    358     MIB.addReg(ImpDefs[i], RegState::ImplicitDefine);
    359 
    360   return true;
    361 }
    362 
    363 // MergeOpsUpdate - call MergeOps and update MemOps and merges accordingly on
    364 // success.
    365 void ARMLoadStoreOpt::MergeOpsUpdate(MachineBasicBlock &MBB,
    366                                      MemOpQueue &memOps,
    367                                      unsigned memOpsBegin, unsigned memOpsEnd,
    368                                      unsigned insertAfter, int Offset,
    369                                      unsigned Base, bool BaseKill,
    370                                      int Opcode,
    371                                      ARMCC::CondCodes Pred, unsigned PredReg,
    372                                      unsigned Scratch,
    373                                      DebugLoc dl,
    374                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
    375   // First calculate which of the registers should be killed by the merged
    376   // instruction.
    377   const unsigned insertPos = memOps[insertAfter].Position;
    378   SmallSet<unsigned, 4> KilledRegs;
    379   DenseMap<unsigned, unsigned> Killer;
    380   for (unsigned i = 0, e = memOps.size(); i != e; ++i) {
    381     if (i == memOpsBegin) {
    382       i = memOpsEnd;
    383       if (i == e)
    384         break;
    385     }
    386     if (memOps[i].Position < insertPos && memOps[i].isKill) {
    387       unsigned Reg = memOps[i].Reg;
    388       KilledRegs.insert(Reg);
    389       Killer[Reg] = i;
    390     }
    391   }
    392 
    393   SmallVector<std::pair<unsigned, bool>, 8> Regs;
    394   SmallVector<unsigned, 8> ImpDefs;
    395   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
    396     unsigned Reg = memOps[i].Reg;
    397     // If we are inserting the merged operation after an operation that
    398     // uses the same register, make sure to transfer any kill flag.
    399     bool isKill = memOps[i].isKill || KilledRegs.count(Reg);
    400     Regs.push_back(std::make_pair(Reg, isKill));
    401 
    402     // Collect any implicit defs of super-registers. They must be preserved.
    403     for (MIOperands MO(memOps[i].MBBI); MO.isValid(); ++MO) {
    404       if (!MO->isReg() || !MO->isDef() || !MO->isImplicit() || MO->isDead())
    405         continue;
    406       unsigned DefReg = MO->getReg();
    407       if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) == ImpDefs.end())
    408         ImpDefs.push_back(DefReg);
    409     }
    410   }
    411 
    412   // Try to do the merge.
    413   MachineBasicBlock::iterator Loc = memOps[insertAfter].MBBI;
    414   ++Loc;
    415   if (!MergeOps(MBB, Loc, Offset, Base, BaseKill, Opcode,
    416                 Pred, PredReg, Scratch, dl, Regs, ImpDefs))
    417     return;
    418 
    419   // Merge succeeded, update records.
    420   Merges.push_back(prior(Loc));
    421   for (unsigned i = memOpsBegin; i < memOpsEnd; ++i) {
    422     // Remove kill flags from any memops that come before insertPos.
    423     if (Regs[i-memOpsBegin].second) {
    424       unsigned Reg = Regs[i-memOpsBegin].first;
    425       if (KilledRegs.count(Reg)) {
    426         unsigned j = Killer[Reg];
    427         int Idx = memOps[j].MBBI->findRegisterUseOperandIdx(Reg, true);
    428         assert(Idx >= 0 && "Cannot find killing operand");
    429         memOps[j].MBBI->getOperand(Idx).setIsKill(false);
    430         memOps[j].isKill = false;
    431       }
    432       memOps[i].isKill = true;
    433     }
    434     MBB.erase(memOps[i].MBBI);
    435     // Update this memop to refer to the merged instruction.
    436     // We may need to move kill flags again.
    437     memOps[i].Merged = true;
    438     memOps[i].MBBI = Merges.back();
    439     memOps[i].Position = insertPos;
    440   }
    441 }
    442 
    443 /// MergeLDR_STR - Merge a number of load / store instructions into one or more
    444 /// load / store multiple instructions.
    445 void
    446 ARMLoadStoreOpt::MergeLDR_STR(MachineBasicBlock &MBB, unsigned SIndex,
    447                          unsigned Base, int Opcode, unsigned Size,
    448                          ARMCC::CondCodes Pred, unsigned PredReg,
    449                          unsigned Scratch, MemOpQueue &MemOps,
    450                          SmallVectorImpl<MachineBasicBlock::iterator> &Merges) {
    451   bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
    452   int Offset = MemOps[SIndex].Offset;
    453   int SOffset = Offset;
    454   unsigned insertAfter = SIndex;
    455   MachineBasicBlock::iterator Loc = MemOps[SIndex].MBBI;
    456   DebugLoc dl = Loc->getDebugLoc();
    457   const MachineOperand &PMO = Loc->getOperand(0);
    458   unsigned PReg = PMO.getReg();
    459   unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
    460   unsigned Count = 1;
    461   unsigned Limit = ~0U;
    462 
    463   // vldm / vstm limit are 32 for S variants, 16 for D variants.
    464 
    465   switch (Opcode) {
    466   default: break;
    467   case ARM::VSTRS:
    468     Limit = 32;
    469     break;
    470   case ARM::VSTRD:
    471     Limit = 16;
    472     break;
    473   case ARM::VLDRD:
    474     Limit = 16;
    475     break;
    476   case ARM::VLDRS:
    477     Limit = 32;
    478     break;
    479   }
    480 
    481   for (unsigned i = SIndex+1, e = MemOps.size(); i != e; ++i) {
    482     int NewOffset = MemOps[i].Offset;
    483     const MachineOperand &MO = MemOps[i].MBBI->getOperand(0);
    484     unsigned Reg = MO.getReg();
    485     unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
    486     // Register numbers must be in ascending order. For VFP / NEON load and
    487     // store multiples, the registers must also be consecutive and within the
    488     // limit on the number of registers per instruction.
    489     if (Reg != ARM::SP &&
    490         NewOffset == Offset + (int)Size &&
    491         ((isNotVFP && RegNum > PRegNum) ||
    492          ((Count < Limit) && RegNum == PRegNum+1))) {
    493       Offset += Size;
    494       PRegNum = RegNum;
    495       ++Count;
    496     } else {
    497       // Can't merge this in. Try merge the earlier ones first.
    498       MergeOpsUpdate(MBB, MemOps, SIndex, i, insertAfter, SOffset,
    499                      Base, false, Opcode, Pred, PredReg, Scratch, dl, Merges);
    500       MergeLDR_STR(MBB, i, Base, Opcode, Size, Pred, PredReg, Scratch,
    501                    MemOps, Merges);
    502       return;
    503     }
    504 
    505     if (MemOps[i].Position > MemOps[insertAfter].Position)
    506       insertAfter = i;
    507   }
    508 
    509   bool BaseKill = Loc->findRegisterUseOperandIdx(Base, true) != -1;
    510   MergeOpsUpdate(MBB, MemOps, SIndex, MemOps.size(), insertAfter, SOffset,
    511                  Base, BaseKill, Opcode, Pred, PredReg, Scratch, dl, Merges);
    512   return;
    513 }
    514 
    515 static bool definesCPSR(MachineInstr *MI) {
    516   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    517     const MachineOperand &MO = MI->getOperand(i);
    518     if (!MO.isReg())
    519       continue;
    520     if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
    521       // If the instruction has live CPSR def, then it's not safe to fold it
    522       // into load / store.
    523       return true;
    524   }
    525 
    526   return false;
    527 }
    528 
    529 static bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
    530                                 unsigned Bytes, unsigned Limit,
    531                                 ARMCC::CondCodes Pred, unsigned PredReg) {
    532   unsigned MyPredReg = 0;
    533   if (!MI)
    534     return false;
    535 
    536   bool CheckCPSRDef = false;
    537   switch (MI->getOpcode()) {
    538   default: return false;
    539   case ARM::t2SUBri:
    540   case ARM::SUBri:
    541     CheckCPSRDef = true;
    542   // fallthrough
    543   case ARM::tSUBspi:
    544     break;
    545   }
    546 
    547   // Make sure the offset fits in 8 bits.
    548   if (Bytes == 0 || (Limit && Bytes >= Limit))
    549     return false;
    550 
    551   unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME
    552   if (!(MI->getOperand(0).getReg() == Base &&
    553         MI->getOperand(1).getReg() == Base &&
    554         (MI->getOperand(2).getImm()*Scale) == Bytes &&
    555         getInstrPredicate(MI, MyPredReg) == Pred &&
    556         MyPredReg == PredReg))
    557     return false;
    558 
    559   return CheckCPSRDef ? !definesCPSR(MI) : true;
    560 }
    561 
    562 static bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
    563                                 unsigned Bytes, unsigned Limit,
    564                                 ARMCC::CondCodes Pred, unsigned PredReg) {
    565   unsigned MyPredReg = 0;
    566   if (!MI)
    567     return false;
    568 
    569   bool CheckCPSRDef = false;
    570   switch (MI->getOpcode()) {
    571   default: return false;
    572   case ARM::t2ADDri:
    573   case ARM::ADDri:
    574     CheckCPSRDef = true;
    575   // fallthrough
    576   case ARM::tADDspi:
    577     break;
    578   }
    579 
    580   if (Bytes == 0 || (Limit && Bytes >= Limit))
    581     // Make sure the offset fits in 8 bits.
    582     return false;
    583 
    584   unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME
    585   if (!(MI->getOperand(0).getReg() == Base &&
    586         MI->getOperand(1).getReg() == Base &&
    587         (MI->getOperand(2).getImm()*Scale) == Bytes &&
    588         getInstrPredicate(MI, MyPredReg) == Pred &&
    589         MyPredReg == PredReg))
    590     return false;
    591 
    592   return CheckCPSRDef ? !definesCPSR(MI) : true;
    593 }
    594 
    595 static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
    596   switch (MI->getOpcode()) {
    597   default: return 0;
    598   case ARM::LDRi12:
    599   case ARM::STRi12:
    600   case ARM::t2LDRi8:
    601   case ARM::t2LDRi12:
    602   case ARM::t2STRi8:
    603   case ARM::t2STRi12:
    604   case ARM::VLDRS:
    605   case ARM::VSTRS:
    606     return 4;
    607   case ARM::VLDRD:
    608   case ARM::VSTRD:
    609     return 8;
    610   case ARM::LDMIA:
    611   case ARM::LDMDA:
    612   case ARM::LDMDB:
    613   case ARM::LDMIB:
    614   case ARM::STMIA:
    615   case ARM::STMDA:
    616   case ARM::STMDB:
    617   case ARM::STMIB:
    618   case ARM::t2LDMIA:
    619   case ARM::t2LDMDB:
    620   case ARM::t2STMIA:
    621   case ARM::t2STMDB:
    622   case ARM::VLDMSIA:
    623   case ARM::VSTMSIA:
    624     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
    625   case ARM::VLDMDIA:
    626   case ARM::VSTMDIA:
    627     return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
    628   }
    629 }
    630 
    631 static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
    632                                             ARM_AM::AMSubMode Mode) {
    633   switch (Opc) {
    634   default: llvm_unreachable("Unhandled opcode!");
    635   case ARM::LDMIA:
    636   case ARM::LDMDA:
    637   case ARM::LDMDB:
    638   case ARM::LDMIB:
    639     switch (Mode) {
    640     default: llvm_unreachable("Unhandled submode!");
    641     case ARM_AM::ia: return ARM::LDMIA_UPD;
    642     case ARM_AM::ib: return ARM::LDMIB_UPD;
    643     case ARM_AM::da: return ARM::LDMDA_UPD;
    644     case ARM_AM::db: return ARM::LDMDB_UPD;
    645     }
    646   case ARM::STMIA:
    647   case ARM::STMDA:
    648   case ARM::STMDB:
    649   case ARM::STMIB:
    650     switch (Mode) {
    651     default: llvm_unreachable("Unhandled submode!");
    652     case ARM_AM::ia: return ARM::STMIA_UPD;
    653     case ARM_AM::ib: return ARM::STMIB_UPD;
    654     case ARM_AM::da: return ARM::STMDA_UPD;
    655     case ARM_AM::db: return ARM::STMDB_UPD;
    656     }
    657   case ARM::t2LDMIA:
    658   case ARM::t2LDMDB:
    659     switch (Mode) {
    660     default: llvm_unreachable("Unhandled submode!");
    661     case ARM_AM::ia: return ARM::t2LDMIA_UPD;
    662     case ARM_AM::db: return ARM::t2LDMDB_UPD;
    663     }
    664   case ARM::t2STMIA:
    665   case ARM::t2STMDB:
    666     switch (Mode) {
    667     default: llvm_unreachable("Unhandled submode!");
    668     case ARM_AM::ia: return ARM::t2STMIA_UPD;
    669     case ARM_AM::db: return ARM::t2STMDB_UPD;
    670     }
    671   case ARM::VLDMSIA:
    672     switch (Mode) {
    673     default: llvm_unreachable("Unhandled submode!");
    674     case ARM_AM::ia: return ARM::VLDMSIA_UPD;
    675     case ARM_AM::db: return ARM::VLDMSDB_UPD;
    676     }
    677   case ARM::VLDMDIA:
    678     switch (Mode) {
    679     default: llvm_unreachable("Unhandled submode!");
    680     case ARM_AM::ia: return ARM::VLDMDIA_UPD;
    681     case ARM_AM::db: return ARM::VLDMDDB_UPD;
    682     }
    683   case ARM::VSTMSIA:
    684     switch (Mode) {
    685     default: llvm_unreachable("Unhandled submode!");
    686     case ARM_AM::ia: return ARM::VSTMSIA_UPD;
    687     case ARM_AM::db: return ARM::VSTMSDB_UPD;
    688     }
    689   case ARM::VSTMDIA:
    690     switch (Mode) {
    691     default: llvm_unreachable("Unhandled submode!");
    692     case ARM_AM::ia: return ARM::VSTMDIA_UPD;
    693     case ARM_AM::db: return ARM::VSTMDDB_UPD;
    694     }
    695   }
    696 }
    697 
    698 /// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
    699 /// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
    700 ///
    701 /// stmia rn, <ra, rb, rc>
    702 /// rn := rn + 4 * 3;
    703 /// =>
    704 /// stmia rn!, <ra, rb, rc>
    705 ///
    706 /// rn := rn - 4 * 3;
    707 /// ldmia rn, <ra, rb, rc>
    708 /// =>
    709 /// ldmdb rn!, <ra, rb, rc>
    710 bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
    711                                                MachineBasicBlock::iterator MBBI,
    712                                                bool &Advance,
    713                                                MachineBasicBlock::iterator &I) {
    714   MachineInstr *MI = MBBI;
    715   unsigned Base = MI->getOperand(0).getReg();
    716   bool BaseKill = MI->getOperand(0).isKill();
    717   unsigned Bytes = getLSMultipleTransferSize(MI);
    718   unsigned PredReg = 0;
    719   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
    720   int Opcode = MI->getOpcode();
    721   DebugLoc dl = MI->getDebugLoc();
    722 
    723   // Can't use an updating ld/st if the base register is also a dest
    724   // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
    725   for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
    726     if (MI->getOperand(i).getReg() == Base)
    727       return false;
    728 
    729   bool DoMerge = false;
    730   ARM_AM::AMSubMode Mode = ARM_AM::getLoadStoreMultipleSubMode(Opcode);
    731 
    732   // Try merging with the previous instruction.
    733   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    734   if (MBBI != BeginMBBI) {
    735     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
    736     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
    737       --PrevMBBI;
    738     if (Mode == ARM_AM::ia &&
    739         isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
    740       Mode = ARM_AM::db;
    741       DoMerge = true;
    742     } else if (Mode == ARM_AM::ib &&
    743                isMatchingDecrement(PrevMBBI, Base, Bytes, 0, Pred, PredReg)) {
    744       Mode = ARM_AM::da;
    745       DoMerge = true;
    746     }
    747     if (DoMerge)
    748       MBB.erase(PrevMBBI);
    749   }
    750 
    751   // Try merging with the next instruction.
    752   MachineBasicBlock::iterator EndMBBI = MBB.end();
    753   if (!DoMerge && MBBI != EndMBBI) {
    754     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
    755     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
    756       ++NextMBBI;
    757     if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
    758         isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
    759       DoMerge = true;
    760     } else if ((Mode == ARM_AM::da || Mode == ARM_AM::db) &&
    761                isMatchingDecrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
    762       DoMerge = true;
    763     }
    764     if (DoMerge) {
    765       if (NextMBBI == I) {
    766         Advance = true;
    767         ++I;
    768       }
    769       MBB.erase(NextMBBI);
    770     }
    771   }
    772 
    773   if (!DoMerge)
    774     return false;
    775 
    776   unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
    777   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
    778     .addReg(Base, getDefRegState(true)) // WB base register
    779     .addReg(Base, getKillRegState(BaseKill))
    780     .addImm(Pred).addReg(PredReg);
    781 
    782   // Transfer the rest of operands.
    783   for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
    784     MIB.addOperand(MI->getOperand(OpNum));
    785 
    786   // Transfer memoperands.
    787   MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
    788 
    789   MBB.erase(MBBI);
    790   return true;
    791 }
    792 
    793 static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
    794                                              ARM_AM::AddrOpc Mode) {
    795   switch (Opc) {
    796   case ARM::LDRi12:
    797     return ARM::LDR_PRE_IMM;
    798   case ARM::STRi12:
    799     return ARM::STR_PRE_IMM;
    800   case ARM::VLDRS:
    801     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    802   case ARM::VLDRD:
    803     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    804   case ARM::VSTRS:
    805     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    806   case ARM::VSTRD:
    807     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    808   case ARM::t2LDRi8:
    809   case ARM::t2LDRi12:
    810     return ARM::t2LDR_PRE;
    811   case ARM::t2STRi8:
    812   case ARM::t2STRi12:
    813     return ARM::t2STR_PRE;
    814   default: llvm_unreachable("Unhandled opcode!");
    815   }
    816 }
    817 
    818 static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
    819                                               ARM_AM::AddrOpc Mode) {
    820   switch (Opc) {
    821   case ARM::LDRi12:
    822     return ARM::LDR_POST_IMM;
    823   case ARM::STRi12:
    824     return ARM::STR_POST_IMM;
    825   case ARM::VLDRS:
    826     return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
    827   case ARM::VLDRD:
    828     return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
    829   case ARM::VSTRS:
    830     return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
    831   case ARM::VSTRD:
    832     return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
    833   case ARM::t2LDRi8:
    834   case ARM::t2LDRi12:
    835     return ARM::t2LDR_POST;
    836   case ARM::t2STRi8:
    837   case ARM::t2STRi12:
    838     return ARM::t2STR_POST;
    839   default: llvm_unreachable("Unhandled opcode!");
    840   }
    841 }
    842 
    843 /// MergeBaseUpdateLoadStore - Fold proceeding/trailing inc/dec of base
    844 /// register into the LDR/STR/FLD{D|S}/FST{D|S} op when possible:
    845 bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
    846                                                MachineBasicBlock::iterator MBBI,
    847                                                const TargetInstrInfo *TII,
    848                                                bool &Advance,
    849                                                MachineBasicBlock::iterator &I) {
    850   MachineInstr *MI = MBBI;
    851   unsigned Base = MI->getOperand(1).getReg();
    852   bool BaseKill = MI->getOperand(1).isKill();
    853   unsigned Bytes = getLSMultipleTransferSize(MI);
    854   int Opcode = MI->getOpcode();
    855   DebugLoc dl = MI->getDebugLoc();
    856   bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
    857                 Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
    858   bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
    859   if (isi32Load(Opcode) || isi32Store(Opcode))
    860     if (MI->getOperand(2).getImm() != 0)
    861       return false;
    862   if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
    863     return false;
    864 
    865   bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
    866   // Can't do the merge if the destination register is the same as the would-be
    867   // writeback register.
    868   if (MI->getOperand(0).getReg() == Base)
    869     return false;
    870 
    871   unsigned PredReg = 0;
    872   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
    873   bool DoMerge = false;
    874   ARM_AM::AddrOpc AddSub = ARM_AM::add;
    875   unsigned NewOpc = 0;
    876   // AM2 - 12 bits, thumb2 - 8 bits.
    877   unsigned Limit = isAM5 ? 0 : (isAM2 ? 0x1000 : 0x100);
    878 
    879   // Try merging with the previous instruction.
    880   MachineBasicBlock::iterator BeginMBBI = MBB.begin();
    881   if (MBBI != BeginMBBI) {
    882     MachineBasicBlock::iterator PrevMBBI = prior(MBBI);
    883     while (PrevMBBI != BeginMBBI && PrevMBBI->isDebugValue())
    884       --PrevMBBI;
    885     if (isMatchingDecrement(PrevMBBI, Base, Bytes, Limit, Pred, PredReg)) {
    886       DoMerge = true;
    887       AddSub = ARM_AM::sub;
    888     } else if (!isAM5 &&
    889                isMatchingIncrement(PrevMBBI, Base, Bytes, Limit,Pred,PredReg)) {
    890       DoMerge = true;
    891     }
    892     if (DoMerge) {
    893       NewOpc = getPreIndexedLoadStoreOpcode(Opcode, AddSub);
    894       MBB.erase(PrevMBBI);
    895     }
    896   }
    897 
    898   // Try merging with the next instruction.
    899   MachineBasicBlock::iterator EndMBBI = MBB.end();
    900   if (!DoMerge && MBBI != EndMBBI) {
    901     MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
    902     while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
    903       ++NextMBBI;
    904     if (!isAM5 &&
    905         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
    906       DoMerge = true;
    907       AddSub = ARM_AM::sub;
    908     } else if (isMatchingIncrement(NextMBBI, Base, Bytes, Limit,Pred,PredReg)) {
    909       DoMerge = true;
    910     }
    911     if (DoMerge) {
    912       NewOpc = getPostIndexedLoadStoreOpcode(Opcode, AddSub);
    913       if (NextMBBI == I) {
    914         Advance = true;
    915         ++I;
    916       }
    917       MBB.erase(NextMBBI);
    918     }
    919   }
    920 
    921   if (!DoMerge)
    922     return false;
    923 
    924   if (isAM5) {
    925     // VLDM[SD}_UPD, VSTM[SD]_UPD
    926     // (There are no base-updating versions of VLDR/VSTR instructions, but the
    927     // updating load/store-multiple instructions can be used with only one
    928     // register.)
    929     MachineOperand &MO = MI->getOperand(0);
    930     BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
    931       .addReg(Base, getDefRegState(true)) // WB base register
    932       .addReg(Base, getKillRegState(isLd ? BaseKill : false))
    933       .addImm(Pred).addReg(PredReg)
    934       .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
    935                             getKillRegState(MO.isKill())));
    936   } else if (isLd) {
    937     if (isAM2) {
    938       // LDR_PRE, LDR_POST
    939       if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
    940         int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
    941         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
    942           .addReg(Base, RegState::Define)
    943           .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    944       } else {
    945         int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    946         BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
    947           .addReg(Base, RegState::Define)
    948           .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
    949       }
    950     } else {
    951       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
    952       // t2LDR_PRE, t2LDR_POST
    953       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), MI->getOperand(0).getReg())
    954         .addReg(Base, RegState::Define)
    955         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    956     }
    957   } else {
    958     MachineOperand &MO = MI->getOperand(0);
    959     // FIXME: post-indexed stores use am2offset_imm, which still encodes
    960     // the vestigal zero-reg offset register. When that's fixed, this clause
    961     // can be removed entirely.
    962     if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
    963       int Offset = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
    964       // STR_PRE, STR_POST
    965       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
    966         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    967         .addReg(Base).addReg(0).addImm(Offset).addImm(Pred).addReg(PredReg);
    968     } else {
    969       int Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
    970       // t2STR_PRE, t2STR_POST
    971       BuildMI(MBB, MBBI, dl, TII->get(NewOpc), Base)
    972         .addReg(MO.getReg(), getKillRegState(MO.isKill()))
    973         .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
    974     }
    975   }
    976   MBB.erase(MBBI);
    977 
    978   return true;
    979 }
    980 
    981 /// isMemoryOp - Returns true if instruction is a memory operation that this
    982 /// pass is capable of operating on.
    983 static bool isMemoryOp(const MachineInstr *MI) {
    984   // When no memory operands are present, conservatively assume unaligned,
    985   // volatile, unfoldable.
    986   if (!MI->hasOneMemOperand())
    987     return false;
    988 
    989   const MachineMemOperand *MMO = *MI->memoperands_begin();
    990 
    991   // Don't touch volatile memory accesses - we may be changing their order.
    992   if (MMO->isVolatile())
    993     return false;
    994 
    995   // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
    996   // not.
    997   if (MMO->getAlignment() < 4)
    998     return false;
    999 
   1000   // str <undef> could probably be eliminated entirely, but for now we just want
   1001   // to avoid making a mess of it.
   1002   // FIXME: Use str <undef> as a wildcard to enable better stm folding.
   1003   if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
   1004       MI->getOperand(0).isUndef())
   1005     return false;
   1006 
   1007   // Likewise don't mess with references to undefined addresses.
   1008   if (MI->getNumOperands() > 1 && MI->getOperand(1).isReg() &&
   1009       MI->getOperand(1).isUndef())
   1010     return false;
   1011 
   1012   int Opcode = MI->getOpcode();
   1013   switch (Opcode) {
   1014   default: break;
   1015   case ARM::VLDRS:
   1016   case ARM::VSTRS:
   1017     return MI->getOperand(1).isReg();
   1018   case ARM::VLDRD:
   1019   case ARM::VSTRD:
   1020     return MI->getOperand(1).isReg();
   1021   case ARM::LDRi12:
   1022   case ARM::STRi12:
   1023   case ARM::t2LDRi8:
   1024   case ARM::t2LDRi12:
   1025   case ARM::t2STRi8:
   1026   case ARM::t2STRi12:
   1027     return MI->getOperand(1).isReg();
   1028   }
   1029   return false;
   1030 }
   1031 
   1032 /// AdvanceRS - Advance register scavenger to just before the earliest memory
   1033 /// op that is being merged.
   1034 void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) {
   1035   MachineBasicBlock::iterator Loc = MemOps[0].MBBI;
   1036   unsigned Position = MemOps[0].Position;
   1037   for (unsigned i = 1, e = MemOps.size(); i != e; ++i) {
   1038     if (MemOps[i].Position < Position) {
   1039       Position = MemOps[i].Position;
   1040       Loc = MemOps[i].MBBI;
   1041     }
   1042   }
   1043 
   1044   if (Loc != MBB.begin())
   1045     RS->forward(prior(Loc));
   1046 }
   1047 
   1048 static int getMemoryOpOffset(const MachineInstr *MI) {
   1049   int Opcode = MI->getOpcode();
   1050   bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
   1051   unsigned NumOperands = MI->getDesc().getNumOperands();
   1052   unsigned OffField = MI->getOperand(NumOperands-3).getImm();
   1053 
   1054   if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
   1055       Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
   1056       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
   1057       Opcode == ARM::LDRi12   || Opcode == ARM::STRi12)
   1058     return OffField;
   1059 
   1060   int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
   1061     : ARM_AM::getAM5Offset(OffField) * 4;
   1062   if (isAM3) {
   1063     if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub)
   1064       Offset = -Offset;
   1065   } else {
   1066     if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
   1067       Offset = -Offset;
   1068   }
   1069   return Offset;
   1070 }
   1071 
   1072 static void InsertLDR_STR(MachineBasicBlock &MBB,
   1073                           MachineBasicBlock::iterator &MBBI,
   1074                           int Offset, bool isDef,
   1075                           DebugLoc dl, unsigned NewOpc,
   1076                           unsigned Reg, bool RegDeadKill, bool RegUndef,
   1077                           unsigned BaseReg, bool BaseKill, bool BaseUndef,
   1078                           bool OffKill, bool OffUndef,
   1079                           ARMCC::CondCodes Pred, unsigned PredReg,
   1080                           const TargetInstrInfo *TII, bool isT2) {
   1081   if (isDef) {
   1082     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
   1083                                       TII->get(NewOpc))
   1084       .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
   1085       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
   1086     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1087   } else {
   1088     MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
   1089                                       TII->get(NewOpc))
   1090       .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
   1091       .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
   1092     MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1093   }
   1094 }
   1095 
   1096 bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
   1097                                           MachineBasicBlock::iterator &MBBI) {
   1098   MachineInstr *MI = &*MBBI;
   1099   unsigned Opcode = MI->getOpcode();
   1100   if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
   1101       Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
   1102     const MachineOperand &BaseOp = MI->getOperand(2);
   1103     unsigned BaseReg = BaseOp.getReg();
   1104     unsigned EvenReg = MI->getOperand(0).getReg();
   1105     unsigned OddReg  = MI->getOperand(1).getReg();
   1106     unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
   1107     unsigned OddRegNum  = TRI->getDwarfRegNum(OddReg, false);
   1108     // ARM errata 602117: LDRD with base in list may result in incorrect base
   1109     // register when interrupted or faulted.
   1110     bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3();
   1111     if (!Errata602117 &&
   1112         ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum))
   1113       return false;
   1114 
   1115     MachineBasicBlock::iterator NewBBI = MBBI;
   1116     bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
   1117     bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
   1118     bool EvenDeadKill = isLd ?
   1119       MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
   1120     bool EvenUndef = MI->getOperand(0).isUndef();
   1121     bool OddDeadKill  = isLd ?
   1122       MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
   1123     bool OddUndef = MI->getOperand(1).isUndef();
   1124     bool BaseKill = BaseOp.isKill();
   1125     bool BaseUndef = BaseOp.isUndef();
   1126     bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
   1127     bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
   1128     int OffImm = getMemoryOpOffset(MI);
   1129     unsigned PredReg = 0;
   1130     ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   1131 
   1132     if (OddRegNum > EvenRegNum && OffImm == 0) {
   1133       // Ascending register numbers and no offset. It's safe to change it to a
   1134       // ldm or stm.
   1135       unsigned NewOpc = (isLd)
   1136         ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
   1137         : (isT2 ? ARM::t2STMIA : ARM::STMIA);
   1138       if (isLd) {
   1139         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
   1140           .addReg(BaseReg, getKillRegState(BaseKill))
   1141           .addImm(Pred).addReg(PredReg)
   1142           .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
   1143           .addReg(OddReg,  getDefRegState(isLd) | getDeadRegState(OddDeadKill));
   1144         ++NumLDRD2LDM;
   1145       } else {
   1146         BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
   1147           .addReg(BaseReg, getKillRegState(BaseKill))
   1148           .addImm(Pred).addReg(PredReg)
   1149           .addReg(EvenReg,
   1150                   getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
   1151           .addReg(OddReg,
   1152                   getKillRegState(OddDeadKill)  | getUndefRegState(OddUndef));
   1153         ++NumSTRD2STM;
   1154       }
   1155       NewBBI = llvm::prior(MBBI);
   1156     } else {
   1157       // Split into two instructions.
   1158       unsigned NewOpc = (isLd)
   1159         ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
   1160         : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
   1161       // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
   1162       // so adjust and use t2LDRi12 here for that.
   1163       unsigned NewOpc2 = (isLd)
   1164         ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
   1165         : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
   1166       DebugLoc dl = MBBI->getDebugLoc();
   1167       // If this is a load and base register is killed, it may have been
   1168       // re-defed by the load, make sure the first load does not clobber it.
   1169       if (isLd &&
   1170           (BaseKill || OffKill) &&
   1171           (TRI->regsOverlap(EvenReg, BaseReg))) {
   1172         assert(!TRI->regsOverlap(OddReg, BaseReg));
   1173         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
   1174                       OddReg, OddDeadKill, false,
   1175                       BaseReg, false, BaseUndef, false, OffUndef,
   1176                       Pred, PredReg, TII, isT2);
   1177         NewBBI = llvm::prior(MBBI);
   1178         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
   1179                       EvenReg, EvenDeadKill, false,
   1180                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
   1181                       Pred, PredReg, TII, isT2);
   1182       } else {
   1183         if (OddReg == EvenReg && EvenDeadKill) {
   1184           // If the two source operands are the same, the kill marker is
   1185           // probably on the first one. e.g.
   1186           // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
   1187           EvenDeadKill = false;
   1188           OddDeadKill = true;
   1189         }
   1190         // Never kill the base register in the first instruction.
   1191         if (EvenReg == BaseReg)
   1192           EvenDeadKill = false;
   1193         InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
   1194                       EvenReg, EvenDeadKill, EvenUndef,
   1195                       BaseReg, false, BaseUndef, false, OffUndef,
   1196                       Pred, PredReg, TII, isT2);
   1197         NewBBI = llvm::prior(MBBI);
   1198         InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
   1199                       OddReg, OddDeadKill, OddUndef,
   1200                       BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
   1201                       Pred, PredReg, TII, isT2);
   1202       }
   1203       if (isLd)
   1204         ++NumLDRD2LDR;
   1205       else
   1206         ++NumSTRD2STR;
   1207     }
   1208 
   1209     MBB.erase(MI);
   1210     MBBI = NewBBI;
   1211     return true;
   1212   }
   1213   return false;
   1214 }
   1215 
   1216 /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
   1217 /// ops of the same base and incrementing offset into LDM / STM ops.
   1218 bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
   1219   unsigned NumMerges = 0;
   1220   unsigned NumMemOps = 0;
   1221   MemOpQueue MemOps;
   1222   unsigned CurrBase = 0;
   1223   int CurrOpc = -1;
   1224   unsigned CurrSize = 0;
   1225   ARMCC::CondCodes CurrPred = ARMCC::AL;
   1226   unsigned CurrPredReg = 0;
   1227   unsigned Position = 0;
   1228   SmallVector<MachineBasicBlock::iterator,4> Merges;
   1229 
   1230   RS->enterBasicBlock(&MBB);
   1231   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   1232   while (MBBI != E) {
   1233     if (FixInvalidRegPairOp(MBB, MBBI))
   1234       continue;
   1235 
   1236     bool Advance  = false;
   1237     bool TryMerge = false;
   1238     bool Clobber  = false;
   1239 
   1240     bool isMemOp = isMemoryOp(MBBI);
   1241     if (isMemOp) {
   1242       int Opcode = MBBI->getOpcode();
   1243       unsigned Size = getLSMultipleTransferSize(MBBI);
   1244       const MachineOperand &MO = MBBI->getOperand(0);
   1245       unsigned Reg = MO.getReg();
   1246       bool isKill = MO.isDef() ? false : MO.isKill();
   1247       unsigned Base = MBBI->getOperand(1).getReg();
   1248       unsigned PredReg = 0;
   1249       ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
   1250       int Offset = getMemoryOpOffset(MBBI);
   1251       // Watch out for:
   1252       // r4 := ldr [r5]
   1253       // r5 := ldr [r5, #4]
   1254       // r6 := ldr [r5, #8]
   1255       //
   1256       // The second ldr has effectively broken the chain even though it
   1257       // looks like the later ldr(s) use the same base register. Try to
   1258       // merge the ldr's so far, including this one. But don't try to
   1259       // combine the following ldr(s).
   1260       Clobber = (isi32Load(Opcode) && Base == MBBI->getOperand(0).getReg());
   1261 
   1262       // Watch out for:
   1263       // r4 := ldr [r0, #8]
   1264       // r4 := ldr [r0, #4]
   1265       //
   1266       // The optimization may reorder the second ldr in front of the first
   1267       // ldr, which violates write after write(WAW) dependence. The same as
   1268       // str. Try to merge inst(s) already in MemOps.
   1269       bool Overlap = false;
   1270       for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end(); I != E; ++I) {
   1271         if (TRI->regsOverlap(Reg, I->MBBI->getOperand(0).getReg())) {
   1272           Overlap = true;
   1273           break;
   1274         }
   1275       }
   1276 
   1277       if (CurrBase == 0 && !Clobber) {
   1278         // Start of a new chain.
   1279         CurrBase = Base;
   1280         CurrOpc  = Opcode;
   1281         CurrSize = Size;
   1282         CurrPred = Pred;
   1283         CurrPredReg = PredReg;
   1284         MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill, Position, MBBI));
   1285         ++NumMemOps;
   1286         Advance = true;
   1287       } else if (!Overlap) {
   1288         if (Clobber) {
   1289           TryMerge = true;
   1290           Advance = true;
   1291         }
   1292 
   1293         if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
   1294           // No need to match PredReg.
   1295           // Continue adding to the queue.
   1296           if (Offset > MemOps.back().Offset) {
   1297             MemOps.push_back(MemOpQueueEntry(Offset, Reg, isKill,
   1298                                              Position, MBBI));
   1299             ++NumMemOps;
   1300             Advance = true;
   1301           } else {
   1302             for (MemOpQueueIter I = MemOps.begin(), E = MemOps.end();
   1303                  I != E; ++I) {
   1304               if (Offset < I->Offset) {
   1305                 MemOps.insert(I, MemOpQueueEntry(Offset, Reg, isKill,
   1306                                                  Position, MBBI));
   1307                 ++NumMemOps;
   1308                 Advance = true;
   1309                 break;
   1310               } else if (Offset == I->Offset) {
   1311                 // Collision! This can't be merged!
   1312                 break;
   1313               }
   1314             }
   1315           }
   1316         }
   1317       }
   1318     }
   1319 
   1320     if (MBBI->isDebugValue()) {
   1321       ++MBBI;
   1322       if (MBBI == E)
   1323         // Reach the end of the block, try merging the memory instructions.
   1324         TryMerge = true;
   1325     } else if (Advance) {
   1326       ++Position;
   1327       ++MBBI;
   1328       if (MBBI == E)
   1329         // Reach the end of the block, try merging the memory instructions.
   1330         TryMerge = true;
   1331     } else
   1332       TryMerge = true;
   1333 
   1334     if (TryMerge) {
   1335       if (NumMemOps > 1) {
   1336         // Try to find a free register to use as a new base in case it's needed.
   1337         // First advance to the instruction just before the start of the chain.
   1338         AdvanceRS(MBB, MemOps);
   1339         // Find a scratch register.
   1340         unsigned Scratch = RS->FindUnusedReg(&ARM::GPRRegClass);
   1341         // Process the load / store instructions.
   1342         RS->forward(prior(MBBI));
   1343 
   1344         // Merge ops.
   1345         Merges.clear();
   1346         MergeLDR_STR(MBB, 0, CurrBase, CurrOpc, CurrSize,
   1347                      CurrPred, CurrPredReg, Scratch, MemOps, Merges);
   1348 
   1349         // Try folding preceding/trailing base inc/dec into the generated
   1350         // LDM/STM ops.
   1351         for (unsigned i = 0, e = Merges.size(); i < e; ++i)
   1352           if (MergeBaseUpdateLSMultiple(MBB, Merges[i], Advance, MBBI))
   1353             ++NumMerges;
   1354         NumMerges += Merges.size();
   1355 
   1356         // Try folding preceding/trailing base inc/dec into those load/store
   1357         // that were not merged to form LDM/STM ops.
   1358         for (unsigned i = 0; i != NumMemOps; ++i)
   1359           if (!MemOps[i].Merged)
   1360             if (MergeBaseUpdateLoadStore(MBB, MemOps[i].MBBI, TII,Advance,MBBI))
   1361               ++NumMerges;
   1362 
   1363         // RS may be pointing to an instruction that's deleted.
   1364         RS->skipTo(prior(MBBI));
   1365       } else if (NumMemOps == 1) {
   1366         // Try folding preceding/trailing base inc/dec into the single
   1367         // load/store.
   1368         if (MergeBaseUpdateLoadStore(MBB, MemOps[0].MBBI, TII, Advance, MBBI)) {
   1369           ++NumMerges;
   1370           RS->forward(prior(MBBI));
   1371         }
   1372       }
   1373 
   1374       CurrBase = 0;
   1375       CurrOpc = -1;
   1376       CurrSize = 0;
   1377       CurrPred = ARMCC::AL;
   1378       CurrPredReg = 0;
   1379       if (NumMemOps) {
   1380         MemOps.clear();
   1381         NumMemOps = 0;
   1382       }
   1383 
   1384       // If iterator hasn't been advanced and this is not a memory op, skip it.
   1385       // It can't start a new chain anyway.
   1386       if (!Advance && !isMemOp && MBBI != E) {
   1387         ++Position;
   1388         ++MBBI;
   1389       }
   1390     }
   1391   }
   1392   return NumMerges > 0;
   1393 }
   1394 
   1395 /// MergeReturnIntoLDM - If this is a exit BB, try merging the return ops
   1396 /// ("bx lr" and "mov pc, lr") into the preceding stack restore so it
   1397 /// directly restore the value of LR into pc.
   1398 ///   ldmfd sp!, {..., lr}
   1399 ///   bx lr
   1400 /// or
   1401 ///   ldmfd sp!, {..., lr}
   1402 ///   mov pc, lr
   1403 /// =>
   1404 ///   ldmfd sp!, {..., pc}
   1405 bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   1406   if (MBB.empty()) return false;
   1407 
   1408   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   1409   if (MBBI != MBB.begin() &&
   1410       (MBBI->getOpcode() == ARM::BX_RET ||
   1411        MBBI->getOpcode() == ARM::tBX_RET ||
   1412        MBBI->getOpcode() == ARM::MOVPCLR)) {
   1413     MachineInstr *PrevMI = prior(MBBI);
   1414     unsigned Opcode = PrevMI->getOpcode();
   1415     if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
   1416         Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
   1417         Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
   1418       MachineOperand &MO = PrevMI->getOperand(PrevMI->getNumOperands()-1);
   1419       if (MO.getReg() != ARM::LR)
   1420         return false;
   1421       unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
   1422       assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
   1423               Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
   1424       PrevMI->setDesc(TII->get(NewOpc));
   1425       MO.setReg(ARM::PC);
   1426       PrevMI->copyImplicitOps(*MBB.getParent(), &*MBBI);
   1427       MBB.erase(MBBI);
   1428       return true;
   1429     }
   1430   }
   1431   return false;
   1432 }
   1433 
   1434 bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   1435   const TargetMachine &TM = Fn.getTarget();
   1436   AFI = Fn.getInfo<ARMFunctionInfo>();
   1437   TII = TM.getInstrInfo();
   1438   TRI = TM.getRegisterInfo();
   1439   STI = &TM.getSubtarget<ARMSubtarget>();
   1440   RS = new RegScavenger();
   1441   isThumb2 = AFI->isThumb2Function();
   1442 
   1443   bool Modified = false;
   1444   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
   1445        ++MFI) {
   1446     MachineBasicBlock &MBB = *MFI;
   1447     Modified |= LoadStoreMultipleOpti(MBB);
   1448     if (TM.getSubtarget<ARMSubtarget>().hasV5TOps())
   1449       Modified |= MergeReturnIntoLDM(MBB);
   1450   }
   1451 
   1452   delete RS;
   1453   return Modified;
   1454 }
   1455 
   1456 
   1457 /// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
   1458 /// load / stores from consecutive locations close to make it more
   1459 /// likely they will be combined later.
   1460 
   1461 namespace {
   1462   struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
   1463     static char ID;
   1464     ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
   1465 
   1466     const DataLayout *TD;
   1467     const TargetInstrInfo *TII;
   1468     const TargetRegisterInfo *TRI;
   1469     const ARMSubtarget *STI;
   1470     MachineRegisterInfo *MRI;
   1471     MachineFunction *MF;
   1472 
   1473     virtual bool runOnMachineFunction(MachineFunction &Fn);
   1474 
   1475     virtual const char *getPassName() const {
   1476       return "ARM pre- register allocation load / store optimization pass";
   1477     }
   1478 
   1479   private:
   1480     bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
   1481                           unsigned &NewOpc, unsigned &EvenReg,
   1482                           unsigned &OddReg, unsigned &BaseReg,
   1483                           int &Offset,
   1484                           unsigned &PredReg, ARMCC::CondCodes &Pred,
   1485                           bool &isT2);
   1486     bool RescheduleOps(MachineBasicBlock *MBB,
   1487                        SmallVectorImpl<MachineInstr *> &Ops,
   1488                        unsigned Base, bool isLd,
   1489                        DenseMap<MachineInstr*, unsigned> &MI2LocMap);
   1490     bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
   1491   };
   1492   char ARMPreAllocLoadStoreOpt::ID = 0;
   1493 }
   1494 
   1495 bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   1496   TD  = Fn.getTarget().getDataLayout();
   1497   TII = Fn.getTarget().getInstrInfo();
   1498   TRI = Fn.getTarget().getRegisterInfo();
   1499   STI = &Fn.getTarget().getSubtarget<ARMSubtarget>();
   1500   MRI = &Fn.getRegInfo();
   1501   MF  = &Fn;
   1502 
   1503   bool Modified = false;
   1504   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
   1505        ++MFI)
   1506     Modified |= RescheduleLoadStoreInstrs(MFI);
   1507 
   1508   return Modified;
   1509 }
   1510 
   1511 static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   1512                                       MachineBasicBlock::iterator I,
   1513                                       MachineBasicBlock::iterator E,
   1514                                       SmallPtrSet<MachineInstr*, 4> &MemOps,
   1515                                       SmallSet<unsigned, 4> &MemRegs,
   1516                                       const TargetRegisterInfo *TRI) {
   1517   // Are there stores / loads / calls between them?
   1518   // FIXME: This is overly conservative. We should make use of alias information
   1519   // some day.
   1520   SmallSet<unsigned, 4> AddedRegPressure;
   1521   while (++I != E) {
   1522     if (I->isDebugValue() || MemOps.count(&*I))
   1523       continue;
   1524     if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
   1525       return false;
   1526     if (isLd && I->mayStore())
   1527       return false;
   1528     if (!isLd) {
   1529       if (I->mayLoad())
   1530         return false;
   1531       // It's not safe to move the first 'str' down.
   1532       // str r1, [r0]
   1533       // strh r5, [r0]
   1534       // str r4, [r0, #+4]
   1535       if (I->mayStore())
   1536         return false;
   1537     }
   1538     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
   1539       MachineOperand &MO = I->getOperand(j);
   1540       if (!MO.isReg())
   1541         continue;
   1542       unsigned Reg = MO.getReg();
   1543       if (MO.isDef() && TRI->regsOverlap(Reg, Base))
   1544         return false;
   1545       if (Reg != Base && !MemRegs.count(Reg))
   1546         AddedRegPressure.insert(Reg);
   1547     }
   1548   }
   1549 
   1550   // Estimate register pressure increase due to the transformation.
   1551   if (MemRegs.size() <= 4)
   1552     // Ok if we are moving small number of instructions.
   1553     return true;
   1554   return AddedRegPressure.size() <= MemRegs.size() * 2;
   1555 }
   1556 
   1557 
   1558 /// Copy Op0 and Op1 operands into a new array assigned to MI.
   1559 static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
   1560                                    MachineInstr *Op1) {
   1561   assert(MI->memoperands_empty() && "expected a new machineinstr");
   1562   size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin())
   1563     + (Op1->memoperands_end() - Op1->memoperands_begin());
   1564 
   1565   MachineFunction *MF = MI->getParent()->getParent();
   1566   MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs);
   1567   MachineSDNode::mmo_iterator MemEnd =
   1568     std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin);
   1569   MemEnd =
   1570     std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd);
   1571   MI->setMemRefs(MemBegin, MemEnd);
   1572 }
   1573 
   1574 bool
   1575 ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
   1576                                           DebugLoc &dl,
   1577                                           unsigned &NewOpc, unsigned &EvenReg,
   1578                                           unsigned &OddReg, unsigned &BaseReg,
   1579                                           int &Offset, unsigned &PredReg,
   1580                                           ARMCC::CondCodes &Pred,
   1581                                           bool &isT2) {
   1582   // Make sure we're allowed to generate LDRD/STRD.
   1583   if (!STI->hasV5TEOps())
   1584     return false;
   1585 
   1586   // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
   1587   unsigned Scale = 1;
   1588   unsigned Opcode = Op0->getOpcode();
   1589   if (Opcode == ARM::LDRi12)
   1590     NewOpc = ARM::LDRD;
   1591   else if (Opcode == ARM::STRi12)
   1592     NewOpc = ARM::STRD;
   1593   else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
   1594     NewOpc = ARM::t2LDRDi8;
   1595     Scale = 4;
   1596     isT2 = true;
   1597   } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
   1598     NewOpc = ARM::t2STRDi8;
   1599     Scale = 4;
   1600     isT2 = true;
   1601   } else
   1602     return false;
   1603 
   1604   // Make sure the base address satisfies i64 ld / st alignment requirement.
   1605   // At the moment, we ignore the memoryoperand's value.
   1606   // If we want to use AliasAnalysis, we should check it accordingly.
   1607   if (!Op0->hasOneMemOperand() ||
   1608       (*Op0->memoperands_begin())->isVolatile())
   1609     return false;
   1610 
   1611   unsigned Align = (*Op0->memoperands_begin())->getAlignment();
   1612   const Function *Func = MF->getFunction();
   1613   unsigned ReqAlign = STI->hasV6Ops()
   1614     ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
   1615     : 8;  // Pre-v6 need 8-byte align
   1616   if (Align < ReqAlign)
   1617     return false;
   1618 
   1619   // Then make sure the immediate offset fits.
   1620   int OffImm = getMemoryOpOffset(Op0);
   1621   if (isT2) {
   1622     int Limit = (1 << 8) * Scale;
   1623     if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
   1624       return false;
   1625     Offset = OffImm;
   1626   } else {
   1627     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1628     if (OffImm < 0) {
   1629       AddSub = ARM_AM::sub;
   1630       OffImm = - OffImm;
   1631     }
   1632     int Limit = (1 << 8) * Scale;
   1633     if (OffImm >= Limit || (OffImm & (Scale-1)))
   1634       return false;
   1635     Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
   1636   }
   1637   EvenReg = Op0->getOperand(0).getReg();
   1638   OddReg  = Op1->getOperand(0).getReg();
   1639   if (EvenReg == OddReg)
   1640     return false;
   1641   BaseReg = Op0->getOperand(1).getReg();
   1642   Pred = getInstrPredicate(Op0, PredReg);
   1643   dl = Op0->getDebugLoc();
   1644   return true;
   1645 }
   1646 
   1647 namespace {
   1648   struct OffsetCompare {
   1649     bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
   1650       int LOffset = getMemoryOpOffset(LHS);
   1651       int ROffset = getMemoryOpOffset(RHS);
   1652       assert(LHS == RHS || LOffset != ROffset);
   1653       return LOffset > ROffset;
   1654     }
   1655   };
   1656 }
   1657 
   1658 bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
   1659                                  SmallVectorImpl<MachineInstr *> &Ops,
   1660                                  unsigned Base, bool isLd,
   1661                                  DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
   1662   bool RetVal = false;
   1663 
   1664   // Sort by offset (in reverse order).
   1665   std::sort(Ops.begin(), Ops.end(), OffsetCompare());
   1666 
   1667   // The loads / stores of the same base are in order. Scan them from first to
   1668   // last and check for the following:
   1669   // 1. Any def of base.
   1670   // 2. Any gaps.
   1671   while (Ops.size() > 1) {
   1672     unsigned FirstLoc = ~0U;
   1673     unsigned LastLoc = 0;
   1674     MachineInstr *FirstOp = 0;
   1675     MachineInstr *LastOp = 0;
   1676     int LastOffset = 0;
   1677     unsigned LastOpcode = 0;
   1678     unsigned LastBytes = 0;
   1679     unsigned NumMove = 0;
   1680     for (int i = Ops.size() - 1; i >= 0; --i) {
   1681       MachineInstr *Op = Ops[i];
   1682       unsigned Loc = MI2LocMap[Op];
   1683       if (Loc <= FirstLoc) {
   1684         FirstLoc = Loc;
   1685         FirstOp = Op;
   1686       }
   1687       if (Loc >= LastLoc) {
   1688         LastLoc = Loc;
   1689         LastOp = Op;
   1690       }
   1691 
   1692       unsigned LSMOpcode
   1693         = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
   1694       if (LastOpcode && LSMOpcode != LastOpcode)
   1695         break;
   1696 
   1697       int Offset = getMemoryOpOffset(Op);
   1698       unsigned Bytes = getLSMultipleTransferSize(Op);
   1699       if (LastBytes) {
   1700         if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
   1701           break;
   1702       }
   1703       LastOffset = Offset;
   1704       LastBytes = Bytes;
   1705       LastOpcode = LSMOpcode;
   1706       if (++NumMove == 8) // FIXME: Tune this limit.
   1707         break;
   1708     }
   1709 
   1710     if (NumMove <= 1)
   1711       Ops.pop_back();
   1712     else {
   1713       SmallPtrSet<MachineInstr*, 4> MemOps;
   1714       SmallSet<unsigned, 4> MemRegs;
   1715       for (int i = NumMove-1; i >= 0; --i) {
   1716         MemOps.insert(Ops[i]);
   1717         MemRegs.insert(Ops[i]->getOperand(0).getReg());
   1718       }
   1719 
   1720       // Be conservative, if the instructions are too far apart, don't
   1721       // move them. We want to limit the increase of register pressure.
   1722       bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
   1723       if (DoMove)
   1724         DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
   1725                                            MemOps, MemRegs, TRI);
   1726       if (!DoMove) {
   1727         for (unsigned i = 0; i != NumMove; ++i)
   1728           Ops.pop_back();
   1729       } else {
   1730         // This is the new location for the loads / stores.
   1731         MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
   1732         while (InsertPos != MBB->end()
   1733                && (MemOps.count(InsertPos) || InsertPos->isDebugValue()))
   1734           ++InsertPos;
   1735 
   1736         // If we are moving a pair of loads / stores, see if it makes sense
   1737         // to try to allocate a pair of registers that can form register pairs.
   1738         MachineInstr *Op0 = Ops.back();
   1739         MachineInstr *Op1 = Ops[Ops.size()-2];
   1740         unsigned EvenReg = 0, OddReg = 0;
   1741         unsigned BaseReg = 0, PredReg = 0;
   1742         ARMCC::CondCodes Pred = ARMCC::AL;
   1743         bool isT2 = false;
   1744         unsigned NewOpc = 0;
   1745         int Offset = 0;
   1746         DebugLoc dl;
   1747         if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
   1748                                              EvenReg, OddReg, BaseReg,
   1749                                              Offset, PredReg, Pred, isT2)) {
   1750           Ops.pop_back();
   1751           Ops.pop_back();
   1752 
   1753           const MCInstrDesc &MCID = TII->get(NewOpc);
   1754           const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
   1755           MRI->constrainRegClass(EvenReg, TRC);
   1756           MRI->constrainRegClass(OddReg, TRC);
   1757 
   1758           // Form the pair instruction.
   1759           if (isLd) {
   1760             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
   1761               .addReg(EvenReg, RegState::Define)
   1762               .addReg(OddReg, RegState::Define)
   1763               .addReg(BaseReg);
   1764             // FIXME: We're converting from LDRi12 to an insn that still
   1765             // uses addrmode2, so we need an explicit offset reg. It should
   1766             // always by reg0 since we're transforming LDRi12s.
   1767             if (!isT2)
   1768               MIB.addReg(0);
   1769             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1770             concatenateMemOperands(MIB, Op0, Op1);
   1771             DEBUG(dbgs() << "Formed " << *MIB << "\n");
   1772             ++NumLDRDFormed;
   1773           } else {
   1774             MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
   1775               .addReg(EvenReg)
   1776               .addReg(OddReg)
   1777               .addReg(BaseReg);
   1778             // FIXME: We're converting from LDRi12 to an insn that still
   1779             // uses addrmode2, so we need an explicit offset reg. It should
   1780             // always by reg0 since we're transforming STRi12s.
   1781             if (!isT2)
   1782               MIB.addReg(0);
   1783             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
   1784             concatenateMemOperands(MIB, Op0, Op1);
   1785             DEBUG(dbgs() << "Formed " << *MIB << "\n");
   1786             ++NumSTRDFormed;
   1787           }
   1788           MBB->erase(Op0);
   1789           MBB->erase(Op1);
   1790 
   1791           // Add register allocation hints to form register pairs.
   1792           MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
   1793           MRI->setRegAllocationHint(OddReg,  ARMRI::RegPairOdd, EvenReg);
   1794         } else {
   1795           for (unsigned i = 0; i != NumMove; ++i) {
   1796             MachineInstr *Op = Ops.back();
   1797             Ops.pop_back();
   1798             MBB->splice(InsertPos, MBB, Op);
   1799           }
   1800         }
   1801 
   1802         NumLdStMoved += NumMove;
   1803         RetVal = true;
   1804       }
   1805     }
   1806   }
   1807 
   1808   return RetVal;
   1809 }
   1810 
   1811 bool
   1812 ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   1813   bool RetVal = false;
   1814 
   1815   DenseMap<MachineInstr*, unsigned> MI2LocMap;
   1816   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
   1817   DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
   1818   SmallVector<unsigned, 4> LdBases;
   1819   SmallVector<unsigned, 4> StBases;
   1820 
   1821   unsigned Loc = 0;
   1822   MachineBasicBlock::iterator MBBI = MBB->begin();
   1823   MachineBasicBlock::iterator E = MBB->end();
   1824   while (MBBI != E) {
   1825     for (; MBBI != E; ++MBBI) {
   1826       MachineInstr *MI = MBBI;
   1827       if (MI->isCall() || MI->isTerminator()) {
   1828         // Stop at barriers.
   1829         ++MBBI;
   1830         break;
   1831       }
   1832 
   1833       if (!MI->isDebugValue())
   1834         MI2LocMap[MI] = ++Loc;
   1835 
   1836       if (!isMemoryOp(MI))
   1837         continue;
   1838       unsigned PredReg = 0;
   1839       if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
   1840         continue;
   1841 
   1842       int Opc = MI->getOpcode();
   1843       bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
   1844       unsigned Base = MI->getOperand(1).getReg();
   1845       int Offset = getMemoryOpOffset(MI);
   1846 
   1847       bool StopHere = false;
   1848       if (isLd) {
   1849         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
   1850           Base2LdsMap.find(Base);
   1851         if (BI != Base2LdsMap.end()) {
   1852           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
   1853             if (Offset == getMemoryOpOffset(BI->second[i])) {
   1854               StopHere = true;
   1855               break;
   1856             }
   1857           }
   1858           if (!StopHere)
   1859             BI->second.push_back(MI);
   1860         } else {
   1861           Base2LdsMap[Base].push_back(MI);
   1862           LdBases.push_back(Base);
   1863         }
   1864       } else {
   1865         DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
   1866           Base2StsMap.find(Base);
   1867         if (BI != Base2StsMap.end()) {
   1868           for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
   1869             if (Offset == getMemoryOpOffset(BI->second[i])) {
   1870               StopHere = true;
   1871               break;
   1872             }
   1873           }
   1874           if (!StopHere)
   1875             BI->second.push_back(MI);
   1876         } else {
   1877           Base2StsMap[Base].push_back(MI);
   1878           StBases.push_back(Base);
   1879         }
   1880       }
   1881 
   1882       if (StopHere) {
   1883         // Found a duplicate (a base+offset combination that's seen earlier).
   1884         // Backtrack.
   1885         --Loc;
   1886         break;
   1887       }
   1888     }
   1889 
   1890     // Re-schedule loads.
   1891     for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
   1892       unsigned Base = LdBases[i];
   1893       SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
   1894       if (Lds.size() > 1)
   1895         RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
   1896     }
   1897 
   1898     // Re-schedule stores.
   1899     for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
   1900       unsigned Base = StBases[i];
   1901       SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
   1902       if (Sts.size() > 1)
   1903         RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
   1904     }
   1905 
   1906     if (MBBI != E) {
   1907       Base2LdsMap.clear();
   1908       Base2StsMap.clear();
   1909       LdBases.clear();
   1910       StBases.clear();
   1911     }
   1912   }
   1913 
   1914   return RetVal;
   1915 }
   1916 
   1917 
   1918 /// createARMLoadStoreOptimizationPass - returns an instance of the load / store
   1919 /// optimization pass.
   1920 FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
   1921   if (PreAlloc)
   1922     return new ARMPreAllocLoadStoreOpt();
   1923   return new ARMLoadStoreOpt();
   1924 }
   1925