Home | History | Annotate | Download | only in ARM
      1 //===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the Base ARM implementation of the TargetInstrInfo class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ARMBaseInstrInfo.h"
     15 #include "ARM.h"
     16 #include "ARMBaseRegisterInfo.h"
     17 #include "ARMConstantPoolValue.h"
     18 #include "ARMHazardRecognizer.h"
     19 #include "ARMMachineFunctionInfo.h"
     20 #include "MCTargetDesc/ARMAddressingModes.h"
     21 #include "llvm/ADT/STLExtras.h"
     22 #include "llvm/CodeGen/LiveVariables.h"
     23 #include "llvm/CodeGen/MachineConstantPool.h"
     24 #include "llvm/CodeGen/MachineFrameInfo.h"
     25 #include "llvm/CodeGen/MachineInstrBuilder.h"
     26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     27 #include "llvm/CodeGen/MachineMemOperand.h"
     28 #include "llvm/CodeGen/MachineRegisterInfo.h"
     29 #include "llvm/CodeGen/SelectionDAGNodes.h"
     30 #include "llvm/IR/Constants.h"
     31 #include "llvm/IR/Function.h"
     32 #include "llvm/IR/GlobalValue.h"
     33 #include "llvm/MC/MCAsmInfo.h"
     34 #include "llvm/Support/BranchProbability.h"
     35 #include "llvm/Support/CommandLine.h"
     36 #include "llvm/Support/Debug.h"
     37 #include "llvm/Support/ErrorHandling.h"
     38 
     39 #define GET_INSTRINFO_CTOR
     40 #include "ARMGenInstrInfo.inc"
     41 
     42 using namespace llvm;
     43 
     44 static cl::opt<bool>
     45 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
     46                cl::desc("Enable ARM 2-addr to 3-addr conv"));
     47 
     48 static cl::opt<bool>
     49 WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
     50            cl::desc("Widen ARM vmovs to vmovd when possible"));
     51 
     52 static cl::opt<unsigned>
     53 SwiftPartialUpdateClearance("swift-partial-update-clearance",
     54      cl::Hidden, cl::init(12),
     55      cl::desc("Clearance before partial register updates"));
     56 
     57 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
     58 struct ARM_MLxEntry {
     59   uint16_t MLxOpc;     // MLA / MLS opcode
     60   uint16_t MulOpc;     // Expanded multiplication opcode
     61   uint16_t AddSubOpc;  // Expanded add / sub opcode
     62   bool NegAcc;         // True if the acc is negated before the add / sub.
     63   bool HasLane;        // True if instruction has an extra "lane" operand.
     64 };
     65 
     66 static const ARM_MLxEntry ARM_MLxTable[] = {
     67   // MLxOpc,          MulOpc,           AddSubOpc,       NegAcc, HasLane
     68   // fp scalar ops
     69   { ARM::VMLAS,       ARM::VMULS,       ARM::VADDS,      false,  false },
     70   { ARM::VMLSS,       ARM::VMULS,       ARM::VSUBS,      false,  false },
     71   { ARM::VMLAD,       ARM::VMULD,       ARM::VADDD,      false,  false },
     72   { ARM::VMLSD,       ARM::VMULD,       ARM::VSUBD,      false,  false },
     73   { ARM::VNMLAS,      ARM::VNMULS,      ARM::VSUBS,      true,   false },
     74   { ARM::VNMLSS,      ARM::VMULS,       ARM::VSUBS,      true,   false },
     75   { ARM::VNMLAD,      ARM::VNMULD,      ARM::VSUBD,      true,   false },
     76   { ARM::VNMLSD,      ARM::VMULD,       ARM::VSUBD,      true,   false },
     77 
     78   // fp SIMD ops
     79   { ARM::VMLAfd,      ARM::VMULfd,      ARM::VADDfd,     false,  false },
     80   { ARM::VMLSfd,      ARM::VMULfd,      ARM::VSUBfd,     false,  false },
     81   { ARM::VMLAfq,      ARM::VMULfq,      ARM::VADDfq,     false,  false },
     82   { ARM::VMLSfq,      ARM::VMULfq,      ARM::VSUBfq,     false,  false },
     83   { ARM::VMLAslfd,    ARM::VMULslfd,    ARM::VADDfd,     false,  true  },
     84   { ARM::VMLSslfd,    ARM::VMULslfd,    ARM::VSUBfd,     false,  true  },
     85   { ARM::VMLAslfq,    ARM::VMULslfq,    ARM::VADDfq,     false,  true  },
     86   { ARM::VMLSslfq,    ARM::VMULslfq,    ARM::VSUBfq,     false,  true  },
     87 };
     88 
     89 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
     90   : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     91     Subtarget(STI) {
     92   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     93     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
     94       assert(false && "Duplicated entries?");
     95     MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);
     96     MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);
     97   }
     98 }
     99 
    100 // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl
    101 // currently defaults to no prepass hazard recognizer.
    102 ScheduleHazardRecognizer *ARMBaseInstrInfo::
    103 CreateTargetHazardRecognizer(const TargetMachine *TM,
    104                              const ScheduleDAG *DAG) const {
    105   if (usePreRAHazardRecognizer()) {
    106     const InstrItineraryData *II = TM->getInstrItineraryData();
    107     return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");
    108   }
    109   return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG);
    110 }
    111 
    112 ScheduleHazardRecognizer *ARMBaseInstrInfo::
    113 CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
    114                                    const ScheduleDAG *DAG) const {
    115   if (Subtarget.isThumb2() || Subtarget.hasVFP2())
    116     return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG);
    117   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
    118 }
    119 
    120 MachineInstr *
    121 ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
    122                                         MachineBasicBlock::iterator &MBBI,
    123                                         LiveVariables *LV) const {
    124   // FIXME: Thumb2 support.
    125 
    126   if (!EnableARM3Addr)
    127     return NULL;
    128 
    129   MachineInstr *MI = MBBI;
    130   MachineFunction &MF = *MI->getParent()->getParent();
    131   uint64_t TSFlags = MI->getDesc().TSFlags;
    132   bool isPre = false;
    133   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
    134   default: return NULL;
    135   case ARMII::IndexModePre:
    136     isPre = true;
    137     break;
    138   case ARMII::IndexModePost:
    139     break;
    140   }
    141 
    142   // Try splitting an indexed load/store to an un-indexed one plus an add/sub
    143   // operation.
    144   unsigned MemOpc = getUnindexedOpcode(MI->getOpcode());
    145   if (MemOpc == 0)
    146     return NULL;
    147 
    148   MachineInstr *UpdateMI = NULL;
    149   MachineInstr *MemMI = NULL;
    150   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
    151   const MCInstrDesc &MCID = MI->getDesc();
    152   unsigned NumOps = MCID.getNumOperands();
    153   bool isLoad = !MI->mayStore();
    154   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
    155   const MachineOperand &Base = MI->getOperand(2);
    156   const MachineOperand &Offset = MI->getOperand(NumOps-3);
    157   unsigned WBReg = WB.getReg();
    158   unsigned BaseReg = Base.getReg();
    159   unsigned OffReg = Offset.getReg();
    160   unsigned OffImm = MI->getOperand(NumOps-2).getImm();
    161   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NumOps-1).getImm();
    162   switch (AddrMode) {
    163   default: llvm_unreachable("Unknown indexed op!");
    164   case ARMII::AddrMode2: {
    165     bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
    166     unsigned Amt = ARM_AM::getAM2Offset(OffImm);
    167     if (OffReg == 0) {
    168       if (ARM_AM::getSOImmVal(Amt) == -1)
    169         // Can't encode it in a so_imm operand. This transformation will
    170         // add more than 1 instruction. Abandon!
    171         return NULL;
    172       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
    173                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
    174         .addReg(BaseReg).addImm(Amt)
    175         .addImm(Pred).addReg(0).addReg(0);
    176     } else if (Amt != 0) {
    177       ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);
    178       unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);
    179       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
    180                          get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)
    181         .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
    182         .addImm(Pred).addReg(0).addReg(0);
    183     } else
    184       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
    185                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
    186         .addReg(BaseReg).addReg(OffReg)
    187         .addImm(Pred).addReg(0).addReg(0);
    188     break;
    189   }
    190   case ARMII::AddrMode3 : {
    191     bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;
    192     unsigned Amt = ARM_AM::getAM3Offset(OffImm);
    193     if (OffReg == 0)
    194       // Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.
    195       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
    196                          get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)
    197         .addReg(BaseReg).addImm(Amt)
    198         .addImm(Pred).addReg(0).addReg(0);
    199     else
    200       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
    201                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
    202         .addReg(BaseReg).addReg(OffReg)
    203         .addImm(Pred).addReg(0).addReg(0);
    204     break;
    205   }
    206   }
    207 
    208   std::vector<MachineInstr*> NewMIs;
    209   if (isPre) {
    210     if (isLoad)
    211       MemMI = BuildMI(MF, MI->getDebugLoc(),
    212                       get(MemOpc), MI->getOperand(0).getReg())
    213         .addReg(WBReg).addImm(0).addImm(Pred);
    214     else
    215       MemMI = BuildMI(MF, MI->getDebugLoc(),
    216                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
    217         .addReg(WBReg).addReg(0).addImm(0).addImm(Pred);
    218     NewMIs.push_back(MemMI);
    219     NewMIs.push_back(UpdateMI);
    220   } else {
    221     if (isLoad)
    222       MemMI = BuildMI(MF, MI->getDebugLoc(),
    223                       get(MemOpc), MI->getOperand(0).getReg())
    224         .addReg(BaseReg).addImm(0).addImm(Pred);
    225     else
    226       MemMI = BuildMI(MF, MI->getDebugLoc(),
    227                       get(MemOpc)).addReg(MI->getOperand(1).getReg())
    228         .addReg(BaseReg).addReg(0).addImm(0).addImm(Pred);
    229     if (WB.isDead())
    230       UpdateMI->getOperand(0).setIsDead();
    231     NewMIs.push_back(UpdateMI);
    232     NewMIs.push_back(MemMI);
    233   }
    234 
    235   // Transfer LiveVariables states, kill / dead info.
    236   if (LV) {
    237     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    238       MachineOperand &MO = MI->getOperand(i);
    239       if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
    240         unsigned Reg = MO.getReg();
    241 
    242         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
    243         if (MO.isDef()) {
    244           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
    245           if (MO.isDead())
    246             LV->addVirtualRegisterDead(Reg, NewMI);
    247         }
    248         if (MO.isUse() && MO.isKill()) {
    249           for (unsigned j = 0; j < 2; ++j) {
    250             // Look at the two new MI's in reverse order.
    251             MachineInstr *NewMI = NewMIs[j];
    252             if (!NewMI->readsRegister(Reg))
    253               continue;
    254             LV->addVirtualRegisterKilled(Reg, NewMI);
    255             if (VI.removeKill(MI))
    256               VI.Kills.push_back(NewMI);
    257             break;
    258           }
    259         }
    260       }
    261     }
    262   }
    263 
    264   MFI->insert(MBBI, NewMIs[1]);
    265   MFI->insert(MBBI, NewMIs[0]);
    266   return NewMIs[0];
    267 }
    268 
    269 // Branch analysis.
    270 bool
    271 ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
    272                                 MachineBasicBlock *&FBB,
    273                                 SmallVectorImpl<MachineOperand> &Cond,
    274                                 bool AllowModify) const {
    275   TBB = 0;
    276   FBB = 0;
    277 
    278   MachineBasicBlock::iterator I = MBB.end();
    279   if (I == MBB.begin())
    280     return false; // Empty blocks are easy.
    281   --I;
    282 
    283   // Walk backwards from the end of the basic block until the branch is
    284   // analyzed or we give up.
    285   while (isPredicated(I) || I->isTerminator()) {
    286 
    287     // Flag to be raised on unanalyzeable instructions. This is useful in cases
    288     // where we want to clean up on the end of the basic block before we bail
    289     // out.
    290     bool CantAnalyze = false;
    291 
    292     // Skip over DEBUG values and predicated nonterminators.
    293     while (I->isDebugValue() || !I->isTerminator()) {
    294       if (I == MBB.begin())
    295         return false;
    296       --I;
    297     }
    298 
    299     if (isIndirectBranchOpcode(I->getOpcode()) ||
    300         isJumpTableBranchOpcode(I->getOpcode())) {
    301       // Indirect branches and jump tables can't be analyzed, but we still want
    302       // to clean up any instructions at the tail of the basic block.
    303       CantAnalyze = true;
    304     } else if (isUncondBranchOpcode(I->getOpcode())) {
    305       TBB = I->getOperand(0).getMBB();
    306     } else if (isCondBranchOpcode(I->getOpcode())) {
    307       // Bail out if we encounter multiple conditional branches.
    308       if (!Cond.empty())
    309         return true;
    310 
    311       assert(!FBB && "FBB should have been null.");
    312       FBB = TBB;
    313       TBB = I->getOperand(0).getMBB();
    314       Cond.push_back(I->getOperand(1));
    315       Cond.push_back(I->getOperand(2));
    316     } else if (I->isReturn()) {
    317       // Returns can't be analyzed, but we should run cleanup.
    318       CantAnalyze = !isPredicated(I);
    319     } else {
    320       // We encountered other unrecognized terminator. Bail out immediately.
    321       return true;
    322     }
    323 
    324     // Cleanup code - to be run for unpredicated unconditional branches and
    325     //                returns.
    326     if (!isPredicated(I) &&
    327           (isUncondBranchOpcode(I->getOpcode()) ||
    328            isIndirectBranchOpcode(I->getOpcode()) ||
    329            isJumpTableBranchOpcode(I->getOpcode()) ||
    330            I->isReturn())) {
    331       // Forget any previous condition branch information - it no longer applies.
    332       Cond.clear();
    333       FBB = 0;
    334 
    335       // If we can modify the function, delete everything below this
    336       // unconditional branch.
    337       if (AllowModify) {
    338         MachineBasicBlock::iterator DI = llvm::next(I);
    339         while (DI != MBB.end()) {
    340           MachineInstr *InstToDelete = DI;
    341           ++DI;
    342           InstToDelete->eraseFromParent();
    343         }
    344       }
    345     }
    346 
    347     if (CantAnalyze)
    348       return true;
    349 
    350     if (I == MBB.begin())
    351       return false;
    352 
    353     --I;
    354   }
    355 
    356   // We made it past the terminators without bailing out - we must have
    357   // analyzed this branch successfully.
    358   return false;
    359 }
    360 
    361 
    362 unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
    363   MachineBasicBlock::iterator I = MBB.end();
    364   if (I == MBB.begin()) return 0;
    365   --I;
    366   while (I->isDebugValue()) {
    367     if (I == MBB.begin())
    368       return 0;
    369     --I;
    370   }
    371   if (!isUncondBranchOpcode(I->getOpcode()) &&
    372       !isCondBranchOpcode(I->getOpcode()))
    373     return 0;
    374 
    375   // Remove the branch.
    376   I->eraseFromParent();
    377 
    378   I = MBB.end();
    379 
    380   if (I == MBB.begin()) return 1;
    381   --I;
    382   if (!isCondBranchOpcode(I->getOpcode()))
    383     return 1;
    384 
    385   // Remove the branch.
    386   I->eraseFromParent();
    387   return 2;
    388 }
    389 
    390 unsigned
    391 ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
    392                                MachineBasicBlock *FBB,
    393                                const SmallVectorImpl<MachineOperand> &Cond,
    394                                DebugLoc DL) const {
    395   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
    396   int BOpc   = !AFI->isThumbFunction()
    397     ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
    398   int BccOpc = !AFI->isThumbFunction()
    399     ? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);
    400   bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
    401 
    402   // Shouldn't be a fall through.
    403   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
    404   assert((Cond.size() == 2 || Cond.size() == 0) &&
    405          "ARM branch conditions have two components!");
    406 
    407   if (FBB == 0) {
    408     if (Cond.empty()) { // Unconditional branch?
    409       if (isThumb)
    410         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).addImm(ARMCC::AL).addReg(0);
    411       else
    412         BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
    413     } else
    414       BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
    415         .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
    416     return 1;
    417   }
    418 
    419   // Two-way conditional branch.
    420   BuildMI(&MBB, DL, get(BccOpc)).addMBB(TBB)
    421     .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
    422   if (isThumb)
    423     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).addImm(ARMCC::AL).addReg(0);
    424   else
    425     BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);
    426   return 2;
    427 }
    428 
    429 bool ARMBaseInstrInfo::
    430 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
    431   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
    432   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
    433   return false;
    434 }
    435 
    436 bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
    437   if (MI->isBundle()) {
    438     MachineBasicBlock::const_instr_iterator I = MI;
    439     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
    440     while (++I != E && I->isInsideBundle()) {
    441       int PIdx = I->findFirstPredOperandIdx();
    442       if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)
    443         return true;
    444     }
    445     return false;
    446   }
    447 
    448   int PIdx = MI->findFirstPredOperandIdx();
    449   return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
    450 }
    451 
    452 bool ARMBaseInstrInfo::
    453 PredicateInstruction(MachineInstr *MI,
    454                      const SmallVectorImpl<MachineOperand> &Pred) const {
    455   unsigned Opc = MI->getOpcode();
    456   if (isUncondBranchOpcode(Opc)) {
    457     MI->setDesc(get(getMatchingCondBranchOpcode(Opc)));
    458     MachineInstrBuilder(*MI->getParent()->getParent(), MI)
    459       .addImm(Pred[0].getImm())
    460       .addReg(Pred[1].getReg());
    461     return true;
    462   }
    463 
    464   int PIdx = MI->findFirstPredOperandIdx();
    465   if (PIdx != -1) {
    466     MachineOperand &PMO = MI->getOperand(PIdx);
    467     PMO.setImm(Pred[0].getImm());
    468     MI->getOperand(PIdx+1).setReg(Pred[1].getReg());
    469     return true;
    470   }
    471   return false;
    472 }
    473 
    474 bool ARMBaseInstrInfo::
    475 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
    476                   const SmallVectorImpl<MachineOperand> &Pred2) const {
    477   if (Pred1.size() > 2 || Pred2.size() > 2)
    478     return false;
    479 
    480   ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();
    481   ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();
    482   if (CC1 == CC2)
    483     return true;
    484 
    485   switch (CC1) {
    486   default:
    487     return false;
    488   case ARMCC::AL:
    489     return true;
    490   case ARMCC::HS:
    491     return CC2 == ARMCC::HI;
    492   case ARMCC::LS:
    493     return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;
    494   case ARMCC::GE:
    495     return CC2 == ARMCC::GT;
    496   case ARMCC::LE:
    497     return CC2 == ARMCC::LT;
    498   }
    499 }
    500 
    501 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
    502                                     std::vector<MachineOperand> &Pred) const {
    503   bool Found = false;
    504   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    505     const MachineOperand &MO = MI->getOperand(i);
    506     if ((MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) ||
    507         (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)) {
    508       Pred.push_back(MO);
    509       Found = true;
    510     }
    511   }
    512 
    513   return Found;
    514 }
    515 
    516 /// isPredicable - Return true if the specified instruction can be predicated.
    517 /// By default, this returns true for every instruction with a
    518 /// PredicateOperand.
    519 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
    520   if (!MI->isPredicable())
    521     return false;
    522 
    523   if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
    524     ARMFunctionInfo *AFI =
    525       MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
    526     return AFI->isThumb2Function();
    527   }
    528   return true;
    529 }
    530 
    531 /// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
    532 LLVM_ATTRIBUTE_NOINLINE
    533 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
    534                                 unsigned JTI);
    535 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
    536                                 unsigned JTI) {
    537   assert(JTI < JT.size());
    538   return JT[JTI].MBBs.size();
    539 }
    540 
    541 /// GetInstSize - Return the size of the specified MachineInstr.
    542 ///
    543 unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
    544   const MachineBasicBlock &MBB = *MI->getParent();
    545   const MachineFunction *MF = MBB.getParent();
    546   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
    547 
    548   const MCInstrDesc &MCID = MI->getDesc();
    549   if (MCID.getSize())
    550     return MCID.getSize();
    551 
    552   // If this machine instr is an inline asm, measure it.
    553   if (MI->getOpcode() == ARM::INLINEASM)
    554     return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
    555   if (MI->isLabel())
    556     return 0;
    557   unsigned Opc = MI->getOpcode();
    558   switch (Opc) {
    559   case TargetOpcode::IMPLICIT_DEF:
    560   case TargetOpcode::KILL:
    561   case TargetOpcode::PROLOG_LABEL:
    562   case TargetOpcode::EH_LABEL:
    563   case TargetOpcode::DBG_VALUE:
    564     return 0;
    565   case TargetOpcode::BUNDLE:
    566     return getInstBundleLength(MI);
    567   case ARM::MOVi16_ga_pcrel:
    568   case ARM::MOVTi16_ga_pcrel:
    569   case ARM::t2MOVi16_ga_pcrel:
    570   case ARM::t2MOVTi16_ga_pcrel:
    571     return 4;
    572   case ARM::MOVi32imm:
    573   case ARM::t2MOVi32imm:
    574     return 8;
    575   case ARM::CONSTPOOL_ENTRY:
    576     // If this machine instr is a constant pool entry, its size is recorded as
    577     // operand #2.
    578     return MI->getOperand(2).getImm();
    579   case ARM::Int_eh_sjlj_longjmp:
    580     return 16;
    581   case ARM::tInt_eh_sjlj_longjmp:
    582     return 10;
    583   case ARM::Int_eh_sjlj_setjmp:
    584   case ARM::Int_eh_sjlj_setjmp_nofp:
    585     return 20;
    586   case ARM::tInt_eh_sjlj_setjmp:
    587   case ARM::t2Int_eh_sjlj_setjmp:
    588   case ARM::t2Int_eh_sjlj_setjmp_nofp:
    589     return 12;
    590   case ARM::BR_JTr:
    591   case ARM::BR_JTm:
    592   case ARM::BR_JTadd:
    593   case ARM::tBR_JTr:
    594   case ARM::t2BR_JT:
    595   case ARM::t2TBB_JT:
    596   case ARM::t2TBH_JT: {
    597     // These are jumptable branches, i.e. a branch followed by an inlined
    598     // jumptable. The size is 4 + 4 * number of entries. For TBB, each
    599     // entry is one byte; TBH two byte each.
    600     unsigned EntrySize = (Opc == ARM::t2TBB_JT)
    601       ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
    602     unsigned NumOps = MCID.getNumOperands();
    603     MachineOperand JTOP =
    604       MI->getOperand(NumOps - (MI->isPredicable() ? 3 : 2));
    605     unsigned JTI = JTOP.getIndex();
    606     const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
    607     assert(MJTI != 0);
    608     const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
    609     assert(JTI < JT.size());
    610     // Thumb instructions are 2 byte aligned, but JT entries are 4 byte
    611     // 4 aligned. The assembler / linker may add 2 byte padding just before
    612     // the JT entries.  The size does not include this padding; the
    613     // constant islands pass does separate bookkeeping for it.
    614     // FIXME: If we know the size of the function is less than (1 << 16) *2
    615     // bytes, we can use 16-bit entries instead. Then there won't be an
    616     // alignment issue.
    617     unsigned InstSize = (Opc == ARM::tBR_JTr || Opc == ARM::t2BR_JT) ? 2 : 4;
    618     unsigned NumEntries = getNumJTEntries(JT, JTI);
    619     if (Opc == ARM::t2TBB_JT && (NumEntries & 1))
    620       // Make sure the instruction that follows TBB is 2-byte aligned.
    621       // FIXME: Constant island pass should insert an "ALIGN" instruction
    622       // instead.
    623       ++NumEntries;
    624     return NumEntries * EntrySize + InstSize;
    625   }
    626   default:
    627     // Otherwise, pseudo-instruction sizes are zero.
    628     return 0;
    629   }
    630 }
    631 
    632 unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr *MI) const {
    633   unsigned Size = 0;
    634   MachineBasicBlock::const_instr_iterator I = MI;
    635   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
    636   while (++I != E && I->isInsideBundle()) {
    637     assert(!I->isBundle() && "No nested bundle!");
    638     Size += GetInstSizeInBytes(&*I);
    639   }
    640   return Size;
    641 }
    642 
    643 void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
    644                                    MachineBasicBlock::iterator I, DebugLoc DL,
    645                                    unsigned DestReg, unsigned SrcReg,
    646                                    bool KillSrc) const {
    647   bool GPRDest = ARM::GPRRegClass.contains(DestReg);
    648   bool GPRSrc  = ARM::GPRRegClass.contains(SrcReg);
    649 
    650   if (GPRDest && GPRSrc) {
    651     AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
    652                                   .addReg(SrcReg, getKillRegState(KillSrc))));
    653     return;
    654   }
    655 
    656   bool SPRDest = ARM::SPRRegClass.contains(DestReg);
    657   bool SPRSrc  = ARM::SPRRegClass.contains(SrcReg);
    658 
    659   unsigned Opc = 0;
    660   if (SPRDest && SPRSrc)
    661     Opc = ARM::VMOVS;
    662   else if (GPRDest && SPRSrc)
    663     Opc = ARM::VMOVRS;
    664   else if (SPRDest && GPRSrc)
    665     Opc = ARM::VMOVSR;
    666   else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
    667     Opc = ARM::VMOVD;
    668   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
    669     Opc = ARM::VORRq;
    670 
    671   if (Opc) {
    672     MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
    673     MIB.addReg(SrcReg, getKillRegState(KillSrc));
    674     if (Opc == ARM::VORRq)
    675       MIB.addReg(SrcReg, getKillRegState(KillSrc));
    676     AddDefaultPred(MIB);
    677     return;
    678   }
    679 
    680   // Handle register classes that require multiple instructions.
    681   unsigned BeginIdx = 0;
    682   unsigned SubRegs = 0;
    683   int Spacing = 1;
    684 
    685   // Use VORRq when possible.
    686   if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
    687     Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2;
    688   else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
    689     Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4;
    690   // Fall back to VMOVD.
    691   else if (ARM::DPairRegClass.contains(DestReg, SrcReg))
    692     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2;
    693   else if (ARM::DTripleRegClass.contains(DestReg, SrcReg))
    694     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3;
    695   else if (ARM::DQuadRegClass.contains(DestReg, SrcReg))
    696     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4;
    697   else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg))
    698     Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2;
    699 
    700   else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg))
    701     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2;
    702   else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg))
    703     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2;
    704   else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg))
    705     Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2;
    706 
    707   assert(Opc && "Impossible reg-to-reg copy");
    708 
    709   const TargetRegisterInfo *TRI = &getRegisterInfo();
    710   MachineInstrBuilder Mov;
    711 
    712   // Copy register tuples backward when the first Dest reg overlaps with SrcReg.
    713   if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {
    714     BeginIdx = BeginIdx + ((SubRegs-1)*Spacing);
    715     Spacing = -Spacing;
    716   }
    717 #ifndef NDEBUG
    718   SmallSet<unsigned, 4> DstRegs;
    719 #endif
    720   for (unsigned i = 0; i != SubRegs; ++i) {
    721     unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing);
    722     unsigned Src = TRI->getSubReg(SrcReg,  BeginIdx + i*Spacing);
    723     assert(Dst && Src && "Bad sub-register");
    724 #ifndef NDEBUG
    725     assert(!DstRegs.count(Src) && "destructive vector copy");
    726     DstRegs.insert(Dst);
    727 #endif
    728     Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst)
    729       .addReg(Src);
    730     // VORR takes two source operands.
    731     if (Opc == ARM::VORRq)
    732       Mov.addReg(Src);
    733     Mov = AddDefaultPred(Mov);
    734     // MOVr can set CC.
    735     if (Opc == ARM::MOVr)
    736       Mov = AddDefaultCC(Mov);
    737   }
    738   // Add implicit super-register defs and kills to the last instruction.
    739   Mov->addRegisterDefined(DestReg, TRI);
    740   if (KillSrc)
    741     Mov->addRegisterKilled(SrcReg, TRI);
    742 }
    743 
    744 const MachineInstrBuilder &
    745 ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
    746                           unsigned SubIdx, unsigned State,
    747                           const TargetRegisterInfo *TRI) const {
    748   if (!SubIdx)
    749     return MIB.addReg(Reg, State);
    750 
    751   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    752     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
    753   return MIB.addReg(Reg, State, SubIdx);
    754 }
    755 
    756 void ARMBaseInstrInfo::
    757 storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
    758                     unsigned SrcReg, bool isKill, int FI,
    759                     const TargetRegisterClass *RC,
    760                     const TargetRegisterInfo *TRI) const {
    761   DebugLoc DL;
    762   if (I != MBB.end()) DL = I->getDebugLoc();
    763   MachineFunction &MF = *MBB.getParent();
    764   MachineFrameInfo &MFI = *MF.getFrameInfo();
    765   unsigned Align = MFI.getObjectAlignment(FI);
    766 
    767   MachineMemOperand *MMO =
    768     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
    769                             MachineMemOperand::MOStore,
    770                             MFI.getObjectSize(FI),
    771                             Align);
    772 
    773   switch (RC->getSize()) {
    774     case 4:
    775       if (ARM::GPRRegClass.hasSubClassEq(RC)) {
    776         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STRi12))
    777                    .addReg(SrcReg, getKillRegState(isKill))
    778                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    779       } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    780         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
    781                    .addReg(SrcReg, getKillRegState(isKill))
    782                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    783       } else
    784         llvm_unreachable("Unknown reg class!");
    785       break;
    786     case 8:
    787       if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    788         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
    789                    .addReg(SrcReg, getKillRegState(isKill))
    790                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    791       } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    792         if (Subtarget.hasV5TEOps()) {
    793           MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
    794           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    795           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    796           MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
    797 
    798           AddDefaultPred(MIB);
    799         } else {
    800           // Fallback to STM instruction, which has existed since the dawn of
    801           // time.
    802           MachineInstrBuilder MIB =
    803             AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
    804                              .addFrameIndex(FI).addMemOperand(MMO));
    805           AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
    806           AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
    807         }
    808       } else
    809         llvm_unreachable("Unknown reg class!");
    810       break;
    811     case 16:
    812       if (ARM::DPairRegClass.hasSubClassEq(RC)) {
    813         // Use aligned spills if the stack can be realigned.
    814         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    815           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1q64))
    816                      .addFrameIndex(FI).addImm(16)
    817                      .addReg(SrcReg, getKillRegState(isKill))
    818                      .addMemOperand(MMO));
    819         } else {
    820           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMQIA))
    821                      .addReg(SrcReg, getKillRegState(isKill))
    822                      .addFrameIndex(FI)
    823                      .addMemOperand(MMO));
    824         }
    825       } else
    826         llvm_unreachable("Unknown reg class!");
    827       break;
    828     case 24:
    829       if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
    830         // Use aligned spills if the stack can be realigned.
    831         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    832           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64TPseudo))
    833                      .addFrameIndex(FI).addImm(16)
    834                      .addReg(SrcReg, getKillRegState(isKill))
    835                      .addMemOperand(MMO));
    836         } else {
    837           MachineInstrBuilder MIB =
    838           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    839                        .addFrameIndex(FI))
    840                        .addMemOperand(MMO);
    841           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    842           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    843           AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    844         }
    845       } else
    846         llvm_unreachable("Unknown reg class!");
    847       break;
    848     case 32:
    849       if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
    850         if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
    851           // FIXME: It's possible to only store part of the QQ register if the
    852           // spilled def has a sub-register index.
    853           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VST1d64QPseudo))
    854                      .addFrameIndex(FI).addImm(16)
    855                      .addReg(SrcReg, getKillRegState(isKill))
    856                      .addMemOperand(MMO));
    857         } else {
    858           MachineInstrBuilder MIB =
    859           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    860                        .addFrameIndex(FI))
    861                        .addMemOperand(MMO);
    862           MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    863           MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    864           MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    865                 AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    866         }
    867       } else
    868         llvm_unreachable("Unknown reg class!");
    869       break;
    870     case 64:
    871       if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
    872         MachineInstrBuilder MIB =
    873           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTMDIA))
    874                          .addFrameIndex(FI))
    875                          .addMemOperand(MMO);
    876         MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);
    877         MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);
    878         MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);
    879         MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);
    880         MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);
    881         MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);
    882         MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);
    883               AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);
    884       } else
    885         llvm_unreachable("Unknown reg class!");
    886       break;
    887     default:
    888       llvm_unreachable("Unknown reg class!");
    889   }
    890 }
    891 
    892 unsigned
    893 ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
    894                                      int &FrameIndex) const {
    895   switch (MI->getOpcode()) {
    896   default: break;
    897   case ARM::STRrs:
    898   case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.
    899     if (MI->getOperand(1).isFI() &&
    900         MI->getOperand(2).isReg() &&
    901         MI->getOperand(3).isImm() &&
    902         MI->getOperand(2).getReg() == 0 &&
    903         MI->getOperand(3).getImm() == 0) {
    904       FrameIndex = MI->getOperand(1).getIndex();
    905       return MI->getOperand(0).getReg();
    906     }
    907     break;
    908   case ARM::STRi12:
    909   case ARM::t2STRi12:
    910   case ARM::tSTRspi:
    911   case ARM::VSTRD:
    912   case ARM::VSTRS:
    913     if (MI->getOperand(1).isFI() &&
    914         MI->getOperand(2).isImm() &&
    915         MI->getOperand(2).getImm() == 0) {
    916       FrameIndex = MI->getOperand(1).getIndex();
    917       return MI->getOperand(0).getReg();
    918     }
    919     break;
    920   case ARM::VST1q64:
    921   case ARM::VST1d64TPseudo:
    922   case ARM::VST1d64QPseudo:
    923     if (MI->getOperand(0).isFI() &&
    924         MI->getOperand(2).getSubReg() == 0) {
    925       FrameIndex = MI->getOperand(0).getIndex();
    926       return MI->getOperand(2).getReg();
    927     }
    928     break;
    929   case ARM::VSTMQIA:
    930     if (MI->getOperand(1).isFI() &&
    931         MI->getOperand(0).getSubReg() == 0) {
    932       FrameIndex = MI->getOperand(1).getIndex();
    933       return MI->getOperand(0).getReg();
    934     }
    935     break;
    936   }
    937 
    938   return 0;
    939 }
    940 
    941 unsigned ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
    942                                                     int &FrameIndex) const {
    943   const MachineMemOperand *Dummy;
    944   return MI->mayStore() && hasStoreToStackSlot(MI, Dummy, FrameIndex);
    945 }
    946 
    947 void ARMBaseInstrInfo::
    948 loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
    949                      unsigned DestReg, int FI,
    950                      const TargetRegisterClass *RC,
    951                      const TargetRegisterInfo *TRI) const {
    952   DebugLoc DL;
    953   if (I != MBB.end()) DL = I->getDebugLoc();
    954   MachineFunction &MF = *MBB.getParent();
    955   MachineFrameInfo &MFI = *MF.getFrameInfo();
    956   unsigned Align = MFI.getObjectAlignment(FI);
    957   MachineMemOperand *MMO =
    958     MF.getMachineMemOperand(
    959                     MachinePointerInfo::getFixedStack(FI),
    960                             MachineMemOperand::MOLoad,
    961                             MFI.getObjectSize(FI),
    962                             Align);
    963 
    964   switch (RC->getSize()) {
    965   case 4:
    966     if (ARM::GPRRegClass.hasSubClassEq(RC)) {
    967       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)
    968                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    969 
    970     } else if (ARM::SPRRegClass.hasSubClassEq(RC)) {
    971       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
    972                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    973     } else
    974       llvm_unreachable("Unknown reg class!");
    975     break;
    976   case 8:
    977     if (ARM::DPRRegClass.hasSubClassEq(RC)) {
    978       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
    979                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
    980     } else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
    981       MachineInstrBuilder MIB;
    982 
    983       if (Subtarget.hasV5TEOps()) {
    984         MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
    985         AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    986         AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    987         MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
    988 
    989         AddDefaultPred(MIB);
    990       } else {
    991         // Fallback to LDM instruction, which has existed since the dawn of
    992         // time.
    993         MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
    994                                  .addFrameIndex(FI).addMemOperand(MMO));
    995         MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
    996         MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
    997       }
    998 
    999       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
   1000         MIB.addReg(DestReg, RegState::ImplicitDefine);
   1001     } else
   1002       llvm_unreachable("Unknown reg class!");
   1003     break;
   1004   case 16:
   1005     if (ARM::DPairRegClass.hasSubClassEq(RC)) {
   1006       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
   1007         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
   1008                      .addFrameIndex(FI).addImm(16)
   1009                      .addMemOperand(MMO));
   1010       } else {
   1011         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)
   1012                        .addFrameIndex(FI)
   1013                        .addMemOperand(MMO));
   1014       }
   1015     } else
   1016       llvm_unreachable("Unknown reg class!");
   1017     break;
   1018   case 24:
   1019     if (ARM::DTripleRegClass.hasSubClassEq(RC)) {
   1020       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
   1021         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)
   1022                      .addFrameIndex(FI).addImm(16)
   1023                      .addMemOperand(MMO));
   1024       } else {
   1025         MachineInstrBuilder MIB =
   1026           AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
   1027                          .addFrameIndex(FI)
   1028                          .addMemOperand(MMO));
   1029         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
   1030         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
   1031         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
   1032         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
   1033           MIB.addReg(DestReg, RegState::ImplicitDefine);
   1034       }
   1035     } else
   1036       llvm_unreachable("Unknown reg class!");
   1037     break;
   1038    case 32:
   1039     if (ARM::QQPRRegClass.hasSubClassEq(RC) || ARM::DQuadRegClass.hasSubClassEq(RC)) {
   1040       if (Align >= 16 && getRegisterInfo().canRealignStack(MF)) {
   1041         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)
   1042                      .addFrameIndex(FI).addImm(16)
   1043                      .addMemOperand(MMO));
   1044       } else {
   1045         MachineInstrBuilder MIB =
   1046         AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
   1047                        .addFrameIndex(FI))
   1048                        .addMemOperand(MMO);
   1049         MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
   1050         MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
   1051         MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
   1052         MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
   1053         if (TargetRegisterInfo::isPhysicalRegister(DestReg))
   1054           MIB.addReg(DestReg, RegState::ImplicitDefine);
   1055       }
   1056     } else
   1057       llvm_unreachable("Unknown reg class!");
   1058     break;
   1059   case 64:
   1060     if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {
   1061       MachineInstrBuilder MIB =
   1062       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDMDIA))
   1063                      .addFrameIndex(FI))
   1064                      .addMemOperand(MMO);
   1065       MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);
   1066       MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);
   1067       MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);
   1068       MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);
   1069       MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);
   1070       MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);
   1071       MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);
   1072       MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);
   1073       if (TargetRegisterInfo::isPhysicalRegister(DestReg))
   1074         MIB.addReg(DestReg, RegState::ImplicitDefine);
   1075     } else
   1076       llvm_unreachable("Unknown reg class!");
   1077     break;
   1078   default:
   1079     llvm_unreachable("Unknown regclass!");
   1080   }
   1081 }
   1082 
   1083 unsigned
   1084 ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
   1085                                       int &FrameIndex) const {
   1086   switch (MI->getOpcode()) {
   1087   default: break;
   1088   case ARM::LDRrs:
   1089   case ARM::t2LDRs:  // FIXME: don't use t2LDRs to access frame.
   1090     if (MI->getOperand(1).isFI() &&
   1091         MI->getOperand(2).isReg() &&
   1092         MI->getOperand(3).isImm() &&
   1093         MI->getOperand(2).getReg() == 0 &&
   1094         MI->getOperand(3).getImm() == 0) {
   1095       FrameIndex = MI->getOperand(1).getIndex();
   1096       return MI->getOperand(0).getReg();
   1097     }
   1098     break;
   1099   case ARM::LDRi12:
   1100   case ARM::t2LDRi12:
   1101   case ARM::tLDRspi:
   1102   case ARM::VLDRD:
   1103   case ARM::VLDRS:
   1104     if (MI->getOperand(1).isFI() &&
   1105         MI->getOperand(2).isImm() &&
   1106         MI->getOperand(2).getImm() == 0) {
   1107       FrameIndex = MI->getOperand(1).getIndex();
   1108       return MI->getOperand(0).getReg();
   1109     }
   1110     break;
   1111   case ARM::VLD1q64:
   1112   case ARM::VLD1d64TPseudo:
   1113   case ARM::VLD1d64QPseudo:
   1114     if (MI->getOperand(1).isFI() &&
   1115         MI->getOperand(0).getSubReg() == 0) {
   1116       FrameIndex = MI->getOperand(1).getIndex();
   1117       return MI->getOperand(0).getReg();
   1118     }
   1119     break;
   1120   case ARM::VLDMQIA:
   1121     if (MI->getOperand(1).isFI() &&
   1122         MI->getOperand(0).getSubReg() == 0) {
   1123       FrameIndex = MI->getOperand(1).getIndex();
   1124       return MI->getOperand(0).getReg();
   1125     }
   1126     break;
   1127   }
   1128 
   1129   return 0;
   1130 }
   1131 
   1132 unsigned ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
   1133                                              int &FrameIndex) const {
   1134   const MachineMemOperand *Dummy;
   1135   return MI->mayLoad() && hasLoadFromStackSlot(MI, Dummy, FrameIndex);
   1136 }
   1137 
   1138 bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{
   1139   // This hook gets to expand COPY instructions before they become
   1140   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   1141   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   1142   // changed into a VORR that can go down the NEON pipeline.
   1143   if (!WidenVMOVS || !MI->isCopy() || Subtarget.isCortexA15())
   1144     return false;
   1145 
   1146   // Look for a copy between even S-registers.  That is where we keep floats
   1147   // when using NEON v2f32 instructions for f32 arithmetic.
   1148   unsigned DstRegS = MI->getOperand(0).getReg();
   1149   unsigned SrcRegS = MI->getOperand(1).getReg();
   1150   if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))
   1151     return false;
   1152 
   1153   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1154   unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,
   1155                                               &ARM::DPRRegClass);
   1156   unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,
   1157                                               &ARM::DPRRegClass);
   1158   if (!DstRegD || !SrcRegD)
   1159     return false;
   1160 
   1161   // We want to widen this into a DstRegD = VMOVD SrcRegD copy.  This is only
   1162   // legal if the COPY already defines the full DstRegD, and it isn't a
   1163   // sub-register insertion.
   1164   if (!MI->definesRegister(DstRegD, TRI) || MI->readsRegister(DstRegD, TRI))
   1165     return false;
   1166 
   1167   // A dead copy shouldn't show up here, but reject it just in case.
   1168   if (MI->getOperand(0).isDead())
   1169     return false;
   1170 
   1171   // All clear, widen the COPY.
   1172   DEBUG(dbgs() << "widening:    " << *MI);
   1173   MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   1174 
   1175   // Get rid of the old <imp-def> of DstRegD.  Leave it if it defines a Q-reg
   1176   // or some other super-register.
   1177   int ImpDefIdx = MI->findRegisterDefOperandIdx(DstRegD);
   1178   if (ImpDefIdx != -1)
   1179     MI->RemoveOperand(ImpDefIdx);
   1180 
   1181   // Change the opcode and operands.
   1182   MI->setDesc(get(ARM::VMOVD));
   1183   MI->getOperand(0).setReg(DstRegD);
   1184   MI->getOperand(1).setReg(SrcRegD);
   1185   AddDefaultPred(MIB);
   1186 
   1187   // We are now reading SrcRegD instead of SrcRegS.  This may upset the
   1188   // register scavenger and machine verifier, so we need to indicate that we
   1189   // are reading an undefined value from SrcRegD, but a proper value from
   1190   // SrcRegS.
   1191   MI->getOperand(1).setIsUndef();
   1192   MIB.addReg(SrcRegS, RegState::Implicit);
   1193 
   1194   // SrcRegD may actually contain an unrelated value in the ssub_1
   1195   // sub-register.  Don't kill it.  Only kill the ssub_0 sub-register.
   1196   if (MI->getOperand(1).isKill()) {
   1197     MI->getOperand(1).setIsKill(false);
   1198     MI->addRegisterKilled(SrcRegS, TRI, true);
   1199   }
   1200 
   1201   DEBUG(dbgs() << "replaced by: " << *MI);
   1202   return true;
   1203 }
   1204 
   1205 /// Create a copy of a const pool value. Update CPI to the new index and return
   1206 /// the label UID.
   1207 static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {
   1208   MachineConstantPool *MCP = MF.getConstantPool();
   1209   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1210 
   1211   const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
   1212   assert(MCPE.isMachineConstantPoolEntry() &&
   1213          "Expecting a machine constantpool entry!");
   1214   ARMConstantPoolValue *ACPV =
   1215     static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
   1216 
   1217   unsigned PCLabelId = AFI->createPICLabelUId();
   1218   ARMConstantPoolValue *NewCPV = 0;
   1219   // FIXME: The below assumes PIC relocation model and that the function
   1220   // is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and
   1221   // zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR
   1222   // instructions, so that's probably OK, but is PIC always correct when
   1223   // we get here?
   1224   if (ACPV->isGlobalValue())
   1225     NewCPV = ARMConstantPoolConstant::
   1226       Create(cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId,
   1227              ARMCP::CPValue, 4);
   1228   else if (ACPV->isExtSymbol())
   1229     NewCPV = ARMConstantPoolSymbol::
   1230       Create(MF.getFunction()->getContext(),
   1231              cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);
   1232   else if (ACPV->isBlockAddress())
   1233     NewCPV = ARMConstantPoolConstant::
   1234       Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,
   1235              ARMCP::CPBlockAddress, 4);
   1236   else if (ACPV->isLSDA())
   1237     NewCPV = ARMConstantPoolConstant::Create(MF.getFunction(), PCLabelId,
   1238                                              ARMCP::CPLSDA, 4);
   1239   else if (ACPV->isMachineBasicBlock())
   1240     NewCPV = ARMConstantPoolMBB::
   1241       Create(MF.getFunction()->getContext(),
   1242              cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);
   1243   else
   1244     llvm_unreachable("Unexpected ARM constantpool value type!!");
   1245   CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
   1246   return PCLabelId;
   1247 }
   1248 
   1249 void ARMBaseInstrInfo::
   1250 reMaterialize(MachineBasicBlock &MBB,
   1251               MachineBasicBlock::iterator I,
   1252               unsigned DestReg, unsigned SubIdx,
   1253               const MachineInstr *Orig,
   1254               const TargetRegisterInfo &TRI) const {
   1255   unsigned Opcode = Orig->getOpcode();
   1256   switch (Opcode) {
   1257   default: {
   1258     MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
   1259     MI->substituteRegister(Orig->getOperand(0).getReg(), DestReg, SubIdx, TRI);
   1260     MBB.insert(I, MI);
   1261     break;
   1262   }
   1263   case ARM::tLDRpci_pic:
   1264   case ARM::t2LDRpci_pic: {
   1265     MachineFunction &MF = *MBB.getParent();
   1266     unsigned CPI = Orig->getOperand(1).getIndex();
   1267     unsigned PCLabelId = duplicateCPV(MF, CPI);
   1268     MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
   1269                                       DestReg)
   1270       .addConstantPoolIndex(CPI).addImm(PCLabelId);
   1271     MIB->setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
   1272     break;
   1273   }
   1274   }
   1275 }
   1276 
   1277 MachineInstr *
   1278 ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
   1279   MachineInstr *MI = TargetInstrInfo::duplicate(Orig, MF);
   1280   switch(Orig->getOpcode()) {
   1281   case ARM::tLDRpci_pic:
   1282   case ARM::t2LDRpci_pic: {
   1283     unsigned CPI = Orig->getOperand(1).getIndex();
   1284     unsigned PCLabelId = duplicateCPV(MF, CPI);
   1285     Orig->getOperand(1).setIndex(CPI);
   1286     Orig->getOperand(2).setImm(PCLabelId);
   1287     break;
   1288   }
   1289   }
   1290   return MI;
   1291 }
   1292 
   1293 bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
   1294                                         const MachineInstr *MI1,
   1295                                         const MachineRegisterInfo *MRI) const {
   1296   int Opcode = MI0->getOpcode();
   1297   if (Opcode == ARM::t2LDRpci ||
   1298       Opcode == ARM::t2LDRpci_pic ||
   1299       Opcode == ARM::tLDRpci ||
   1300       Opcode == ARM::tLDRpci_pic ||
   1301       Opcode == ARM::MOV_ga_dyn ||
   1302       Opcode == ARM::MOV_ga_pcrel ||
   1303       Opcode == ARM::MOV_ga_pcrel_ldr ||
   1304       Opcode == ARM::t2MOV_ga_dyn ||
   1305       Opcode == ARM::t2MOV_ga_pcrel) {
   1306     if (MI1->getOpcode() != Opcode)
   1307       return false;
   1308     if (MI0->getNumOperands() != MI1->getNumOperands())
   1309       return false;
   1310 
   1311     const MachineOperand &MO0 = MI0->getOperand(1);
   1312     const MachineOperand &MO1 = MI1->getOperand(1);
   1313     if (MO0.getOffset() != MO1.getOffset())
   1314       return false;
   1315 
   1316     if (Opcode == ARM::MOV_ga_dyn ||
   1317         Opcode == ARM::MOV_ga_pcrel ||
   1318         Opcode == ARM::MOV_ga_pcrel_ldr ||
   1319         Opcode == ARM::t2MOV_ga_dyn ||
   1320         Opcode == ARM::t2MOV_ga_pcrel)
   1321       // Ignore the PC labels.
   1322       return MO0.getGlobal() == MO1.getGlobal();
   1323 
   1324     const MachineFunction *MF = MI0->getParent()->getParent();
   1325     const MachineConstantPool *MCP = MF->getConstantPool();
   1326     int CPI0 = MO0.getIndex();
   1327     int CPI1 = MO1.getIndex();
   1328     const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
   1329     const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
   1330     bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();
   1331     bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();
   1332     if (isARMCP0 && isARMCP1) {
   1333       ARMConstantPoolValue *ACPV0 =
   1334         static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
   1335       ARMConstantPoolValue *ACPV1 =
   1336         static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
   1337       return ACPV0->hasSameValue(ACPV1);
   1338     } else if (!isARMCP0 && !isARMCP1) {
   1339       return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;
   1340     }
   1341     return false;
   1342   } else if (Opcode == ARM::PICLDR) {
   1343     if (MI1->getOpcode() != Opcode)
   1344       return false;
   1345     if (MI0->getNumOperands() != MI1->getNumOperands())
   1346       return false;
   1347 
   1348     unsigned Addr0 = MI0->getOperand(1).getReg();
   1349     unsigned Addr1 = MI1->getOperand(1).getReg();
   1350     if (Addr0 != Addr1) {
   1351       if (!MRI ||
   1352           !TargetRegisterInfo::isVirtualRegister(Addr0) ||
   1353           !TargetRegisterInfo::isVirtualRegister(Addr1))
   1354         return false;
   1355 
   1356       // This assumes SSA form.
   1357       MachineInstr *Def0 = MRI->getVRegDef(Addr0);
   1358       MachineInstr *Def1 = MRI->getVRegDef(Addr1);
   1359       // Check if the loaded value, e.g. a constantpool of a global address, are
   1360       // the same.
   1361       if (!produceSameValue(Def0, Def1, MRI))
   1362         return false;
   1363     }
   1364 
   1365     for (unsigned i = 3, e = MI0->getNumOperands(); i != e; ++i) {
   1366       // %vreg12<def> = PICLDR %vreg11, 0, pred:14, pred:%noreg
   1367       const MachineOperand &MO0 = MI0->getOperand(i);
   1368       const MachineOperand &MO1 = MI1->getOperand(i);
   1369       if (!MO0.isIdenticalTo(MO1))
   1370         return false;
   1371     }
   1372     return true;
   1373   }
   1374 
   1375   return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
   1376 }
   1377 
   1378 /// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to
   1379 /// determine if two loads are loading from the same base address. It should
   1380 /// only return true if the base pointers are the same and the only differences
   1381 /// between the two addresses is the offset. It also returns the offsets by
   1382 /// reference.
   1383 ///
   1384 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
   1385 /// is permanently disabled.
   1386 bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   1387                                                int64_t &Offset1,
   1388                                                int64_t &Offset2) const {
   1389   // Don't worry about Thumb: just ARM and Thumb2.
   1390   if (Subtarget.isThumb1Only()) return false;
   1391 
   1392   if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())
   1393     return false;
   1394 
   1395   switch (Load1->getMachineOpcode()) {
   1396   default:
   1397     return false;
   1398   case ARM::LDRi12:
   1399   case ARM::LDRBi12:
   1400   case ARM::LDRD:
   1401   case ARM::LDRH:
   1402   case ARM::LDRSB:
   1403   case ARM::LDRSH:
   1404   case ARM::VLDRD:
   1405   case ARM::VLDRS:
   1406   case ARM::t2LDRi8:
   1407   case ARM::t2LDRDi8:
   1408   case ARM::t2LDRSHi8:
   1409   case ARM::t2LDRi12:
   1410   case ARM::t2LDRSHi12:
   1411     break;
   1412   }
   1413 
   1414   switch (Load2->getMachineOpcode()) {
   1415   default:
   1416     return false;
   1417   case ARM::LDRi12:
   1418   case ARM::LDRBi12:
   1419   case ARM::LDRD:
   1420   case ARM::LDRH:
   1421   case ARM::LDRSB:
   1422   case ARM::LDRSH:
   1423   case ARM::VLDRD:
   1424   case ARM::VLDRS:
   1425   case ARM::t2LDRi8:
   1426   case ARM::t2LDRSHi8:
   1427   case ARM::t2LDRi12:
   1428   case ARM::t2LDRSHi12:
   1429     break;
   1430   }
   1431 
   1432   // Check if base addresses and chain operands match.
   1433   if (Load1->getOperand(0) != Load2->getOperand(0) ||
   1434       Load1->getOperand(4) != Load2->getOperand(4))
   1435     return false;
   1436 
   1437   // Index should be Reg0.
   1438   if (Load1->getOperand(3) != Load2->getOperand(3))
   1439     return false;
   1440 
   1441   // Determine the offsets.
   1442   if (isa<ConstantSDNode>(Load1->getOperand(1)) &&
   1443       isa<ConstantSDNode>(Load2->getOperand(1))) {
   1444     Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();
   1445     Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();
   1446     return true;
   1447   }
   1448 
   1449   return false;
   1450 }
   1451 
   1452 /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
   1453 /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
   1454 /// be scheduled togther. On some targets if two loads are loading from
   1455 /// addresses in the same cache line, it's better if they are scheduled
   1456 /// together. This function takes two integers that represent the load offsets
   1457 /// from the common base address. It returns true if it decides it's desirable
   1458 /// to schedule the two loads together. "NumLoads" is the number of loads that
   1459 /// have already been scheduled after Load1.
   1460 ///
   1461 /// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched
   1462 /// is permanently disabled.
   1463 bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
   1464                                                int64_t Offset1, int64_t Offset2,
   1465                                                unsigned NumLoads) const {
   1466   // Don't worry about Thumb: just ARM and Thumb2.
   1467   if (Subtarget.isThumb1Only()) return false;
   1468 
   1469   assert(Offset2 > Offset1);
   1470 
   1471   if ((Offset2 - Offset1) / 8 > 64)
   1472     return false;
   1473 
   1474   if (Load1->getMachineOpcode() != Load2->getMachineOpcode())
   1475     return false;  // FIXME: overly conservative?
   1476 
   1477   // Four loads in a row should be sufficient.
   1478   if (NumLoads >= 3)
   1479     return false;
   1480 
   1481   return true;
   1482 }
   1483 
   1484 bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   1485                                             const MachineBasicBlock *MBB,
   1486                                             const MachineFunction &MF) const {
   1487   // Debug info is never a scheduling boundary. It's necessary to be explicit
   1488   // due to the special treatment of IT instructions below, otherwise a
   1489   // dbg_value followed by an IT will result in the IT instruction being
   1490   // considered a scheduling hazard, which is wrong. It should be the actual
   1491   // instruction preceding the dbg_value instruction(s), just like it is
   1492   // when debug info is not present.
   1493   if (MI->isDebugValue())
   1494     return false;
   1495 
   1496   // Terminators and labels can't be scheduled around.
   1497   if (MI->isTerminator() || MI->isLabel())
   1498     return true;
   1499 
   1500   // Treat the start of the IT block as a scheduling boundary, but schedule
   1501   // t2IT along with all instructions following it.
   1502   // FIXME: This is a big hammer. But the alternative is to add all potential
   1503   // true and anti dependencies to IT block instructions as implicit operands
   1504   // to the t2IT instruction. The added compile time and complexity does not
   1505   // seem worth it.
   1506   MachineBasicBlock::const_iterator I = MI;
   1507   // Make sure to skip any dbg_value instructions
   1508   while (++I != MBB->end() && I->isDebugValue())
   1509     ;
   1510   if (I != MBB->end() && I->getOpcode() == ARM::t2IT)
   1511     return true;
   1512 
   1513   // Don't attempt to schedule around any instruction that defines
   1514   // a stack-oriented pointer, as it's unlikely to be profitable. This
   1515   // saves compile time, because it doesn't require every single
   1516   // stack slot reference to depend on the instruction that does the
   1517   // modification.
   1518   // Calls don't actually change the stack pointer, even if they have imp-defs.
   1519   // No ARM calling conventions change the stack pointer. (X86 calling
   1520   // conventions sometimes do).
   1521   if (!MI->isCall() && MI->definesRegister(ARM::SP))
   1522     return true;
   1523 
   1524   return false;
   1525 }
   1526 
   1527 bool ARMBaseInstrInfo::
   1528 isProfitableToIfCvt(MachineBasicBlock &MBB,
   1529                     unsigned NumCycles, unsigned ExtraPredCycles,
   1530                     const BranchProbability &Probability) const {
   1531   if (!NumCycles)
   1532     return false;
   1533 
   1534   // Attempt to estimate the relative costs of predication versus branching.
   1535   unsigned UnpredCost = Probability.getNumerator() * NumCycles;
   1536   UnpredCost /= Probability.getDenominator();
   1537   UnpredCost += 1; // The branch itself
   1538   UnpredCost += Subtarget.getMispredictionPenalty() / 10;
   1539 
   1540   return (NumCycles + ExtraPredCycles) <= UnpredCost;
   1541 }
   1542 
   1543 bool ARMBaseInstrInfo::
   1544 isProfitableToIfCvt(MachineBasicBlock &TMBB,
   1545                     unsigned TCycles, unsigned TExtra,
   1546                     MachineBasicBlock &FMBB,
   1547                     unsigned FCycles, unsigned FExtra,
   1548                     const BranchProbability &Probability) const {
   1549   if (!TCycles || !FCycles)
   1550     return false;
   1551 
   1552   // Attempt to estimate the relative costs of predication versus branching.
   1553   unsigned TUnpredCost = Probability.getNumerator() * TCycles;
   1554   TUnpredCost /= Probability.getDenominator();
   1555 
   1556   uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
   1557   unsigned FUnpredCost = Comp * FCycles;
   1558   FUnpredCost /= Probability.getDenominator();
   1559 
   1560   unsigned UnpredCost = TUnpredCost + FUnpredCost;
   1561   UnpredCost += 1; // The branch itself
   1562   UnpredCost += Subtarget.getMispredictionPenalty() / 10;
   1563 
   1564   return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
   1565 }
   1566 
   1567 bool
   1568 ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
   1569                                             MachineBasicBlock &FMBB) const {
   1570   // Reduce false anti-dependencies to let Swift's out-of-order execution
   1571   // engine do its thing.
   1572   return Subtarget.isSwift();
   1573 }
   1574 
   1575 /// getInstrPredicate - If instruction is predicated, returns its predicate
   1576 /// condition, otherwise returns AL. It also returns the condition code
   1577 /// register by reference.
   1578 ARMCC::CondCodes
   1579 llvm::getInstrPredicate(const MachineInstr *MI, unsigned &PredReg) {
   1580   int PIdx = MI->findFirstPredOperandIdx();
   1581   if (PIdx == -1) {
   1582     PredReg = 0;
   1583     return ARMCC::AL;
   1584   }
   1585 
   1586   PredReg = MI->getOperand(PIdx+1).getReg();
   1587   return (ARMCC::CondCodes)MI->getOperand(PIdx).getImm();
   1588 }
   1589 
   1590 
   1591 int llvm::getMatchingCondBranchOpcode(int Opc) {
   1592   if (Opc == ARM::B)
   1593     return ARM::Bcc;
   1594   if (Opc == ARM::tB)
   1595     return ARM::tBcc;
   1596   if (Opc == ARM::t2B)
   1597     return ARM::t2Bcc;
   1598 
   1599   llvm_unreachable("Unknown unconditional branch opcode!");
   1600 }
   1601 
   1602 /// commuteInstruction - Handle commutable instructions.
   1603 MachineInstr *
   1604 ARMBaseInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   1605   switch (MI->getOpcode()) {
   1606   case ARM::MOVCCr:
   1607   case ARM::t2MOVCCr: {
   1608     // MOVCC can be commuted by inverting the condition.
   1609     unsigned PredReg = 0;
   1610     ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);
   1611     // MOVCC AL can't be inverted. Shouldn't happen.
   1612     if (CC == ARMCC::AL || PredReg != ARM::CPSR)
   1613       return NULL;
   1614     MI = TargetInstrInfo::commuteInstruction(MI, NewMI);
   1615     if (!MI)
   1616       return NULL;
   1617     // After swapping the MOVCC operands, also invert the condition.
   1618     MI->getOperand(MI->findFirstPredOperandIdx())
   1619       .setImm(ARMCC::getOppositeCondition(CC));
   1620     return MI;
   1621   }
   1622   }
   1623   return TargetInstrInfo::commuteInstruction(MI, NewMI);
   1624 }
   1625 
   1626 /// Identify instructions that can be folded into a MOVCC instruction, and
   1627 /// return the defining instruction.
   1628 static MachineInstr *canFoldIntoMOVCC(unsigned Reg,
   1629                                       const MachineRegisterInfo &MRI,
   1630                                       const TargetInstrInfo *TII) {
   1631   if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1632     return 0;
   1633   if (!MRI.hasOneNonDBGUse(Reg))
   1634     return 0;
   1635   MachineInstr *MI = MRI.getVRegDef(Reg);
   1636   if (!MI)
   1637     return 0;
   1638   // MI is folded into the MOVCC by predicating it.
   1639   if (!MI->isPredicable())
   1640     return 0;
   1641   // Check if MI has any non-dead defs or physreg uses. This also detects
   1642   // predicated instructions which will be reading CPSR.
   1643   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
   1644     const MachineOperand &MO = MI->getOperand(i);
   1645     // Reject frame index operands, PEI can't handle the predicated pseudos.
   1646     if (MO.isFI() || MO.isCPI() || MO.isJTI())
   1647       return 0;
   1648     if (!MO.isReg())
   1649       continue;
   1650     // MI can't have any tied operands, that would conflict with predication.
   1651     if (MO.isTied())
   1652       return 0;
   1653     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
   1654       return 0;
   1655     if (MO.isDef() && !MO.isDead())
   1656       return 0;
   1657   }
   1658   bool DontMoveAcrossStores = true;
   1659   if (!MI->isSafeToMove(TII, /* AliasAnalysis = */ 0, DontMoveAcrossStores))
   1660     return 0;
   1661   return MI;
   1662 }
   1663 
   1664 bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr *MI,
   1665                                      SmallVectorImpl<MachineOperand> &Cond,
   1666                                      unsigned &TrueOp, unsigned &FalseOp,
   1667                                      bool &Optimizable) const {
   1668   assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
   1669          "Unknown select instruction");
   1670   // MOVCC operands:
   1671   // 0: Def.
   1672   // 1: True use.
   1673   // 2: False use.
   1674   // 3: Condition code.
   1675   // 4: CPSR use.
   1676   TrueOp = 1;
   1677   FalseOp = 2;
   1678   Cond.push_back(MI->getOperand(3));
   1679   Cond.push_back(MI->getOperand(4));
   1680   // We can always fold a def.
   1681   Optimizable = true;
   1682   return false;
   1683 }
   1684 
   1685 MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI,
   1686                                                bool PreferFalse) const {
   1687   assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) &&
   1688          "Unknown select instruction");
   1689   const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
   1690   MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this);
   1691   bool Invert = !DefMI;
   1692   if (!DefMI)
   1693     DefMI = canFoldIntoMOVCC(MI->getOperand(1).getReg(), MRI, this);
   1694   if (!DefMI)
   1695     return 0;
   1696 
   1697   // Create a new predicated version of DefMI.
   1698   // Rfalse is the first use.
   1699   MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   1700                                       DefMI->getDesc(),
   1701                                       MI->getOperand(0).getReg());
   1702 
   1703   // Copy all the DefMI operands, excluding its (null) predicate.
   1704   const MCInstrDesc &DefDesc = DefMI->getDesc();
   1705   for (unsigned i = 1, e = DefDesc.getNumOperands();
   1706        i != e && !DefDesc.OpInfo[i].isPredicate(); ++i)
   1707     NewMI.addOperand(DefMI->getOperand(i));
   1708 
   1709   unsigned CondCode = MI->getOperand(3).getImm();
   1710   if (Invert)
   1711     NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));
   1712   else
   1713     NewMI.addImm(CondCode);
   1714   NewMI.addOperand(MI->getOperand(4));
   1715 
   1716   // DefMI is not the -S version that sets CPSR, so add an optional %noreg.
   1717   if (NewMI->hasOptionalDef())
   1718     AddDefaultCC(NewMI);
   1719 
   1720   // The output register value when the predicate is false is an implicit
   1721   // register operand tied to the first def.
   1722   // The tie makes the register allocator ensure the FalseReg is allocated the
   1723   // same register as operand 0.
   1724   MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1);
   1725   FalseReg.setImplicit();
   1726   NewMI.addOperand(FalseReg);
   1727   NewMI->tieOperands(0, NewMI->getNumOperands() - 1);
   1728 
   1729   // The caller will erase MI, but not DefMI.
   1730   DefMI->eraseFromParent();
   1731   return NewMI;
   1732 }
   1733 
   1734 /// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the
   1735 /// instruction is encoded with an 'S' bit is determined by the optional CPSR
   1736 /// def operand.
   1737 ///
   1738 /// This will go away once we can teach tblgen how to set the optional CPSR def
   1739 /// operand itself.
   1740 struct AddSubFlagsOpcodePair {
   1741   uint16_t PseudoOpc;
   1742   uint16_t MachineOpc;
   1743 };
   1744 
   1745 static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {
   1746   {ARM::ADDSri, ARM::ADDri},
   1747   {ARM::ADDSrr, ARM::ADDrr},
   1748   {ARM::ADDSrsi, ARM::ADDrsi},
   1749   {ARM::ADDSrsr, ARM::ADDrsr},
   1750 
   1751   {ARM::SUBSri, ARM::SUBri},
   1752   {ARM::SUBSrr, ARM::SUBrr},
   1753   {ARM::SUBSrsi, ARM::SUBrsi},
   1754   {ARM::SUBSrsr, ARM::SUBrsr},
   1755 
   1756   {ARM::RSBSri, ARM::RSBri},
   1757   {ARM::RSBSrsi, ARM::RSBrsi},
   1758   {ARM::RSBSrsr, ARM::RSBrsr},
   1759 
   1760   {ARM::t2ADDSri, ARM::t2ADDri},
   1761   {ARM::t2ADDSrr, ARM::t2ADDrr},
   1762   {ARM::t2ADDSrs, ARM::t2ADDrs},
   1763 
   1764   {ARM::t2SUBSri, ARM::t2SUBri},
   1765   {ARM::t2SUBSrr, ARM::t2SUBrr},
   1766   {ARM::t2SUBSrs, ARM::t2SUBrs},
   1767 
   1768   {ARM::t2RSBSri, ARM::t2RSBri},
   1769   {ARM::t2RSBSrs, ARM::t2RSBrs},
   1770 };
   1771 
   1772 unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {
   1773   for (unsigned i = 0, e = array_lengthof(AddSubFlagsOpcodeMap); i != e; ++i)
   1774     if (OldOpc == AddSubFlagsOpcodeMap[i].PseudoOpc)
   1775       return AddSubFlagsOpcodeMap[i].MachineOpc;
   1776   return 0;
   1777 }
   1778 
   1779 void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,
   1780                                MachineBasicBlock::iterator &MBBI, DebugLoc dl,
   1781                                unsigned DestReg, unsigned BaseReg, int NumBytes,
   1782                                ARMCC::CondCodes Pred, unsigned PredReg,
   1783                                const ARMBaseInstrInfo &TII, unsigned MIFlags) {
   1784   bool isSub = NumBytes < 0;
   1785   if (isSub) NumBytes = -NumBytes;
   1786 
   1787   while (NumBytes) {
   1788     unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);
   1789     unsigned ThisVal = NumBytes & ARM_AM::rotr32(0xFF, RotAmt);
   1790     assert(ThisVal && "Didn't extract field correctly");
   1791 
   1792     // We will handle these bits from offset, clear them.
   1793     NumBytes &= ~ThisVal;
   1794 
   1795     assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");
   1796 
   1797     // Build the new ADD / SUB.
   1798     unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;
   1799     BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)
   1800       .addReg(BaseReg, RegState::Kill).addImm(ThisVal)
   1801       .addImm((unsigned)Pred).addReg(PredReg).addReg(0)
   1802       .setMIFlags(MIFlags);
   1803     BaseReg = DestReg;
   1804   }
   1805 }
   1806 
   1807 bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   1808                                 unsigned FrameReg, int &Offset,
   1809                                 const ARMBaseInstrInfo &TII) {
   1810   unsigned Opcode = MI.getOpcode();
   1811   const MCInstrDesc &Desc = MI.getDesc();
   1812   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   1813   bool isSub = false;
   1814 
   1815   // Memory operands in inline assembly always use AddrMode2.
   1816   if (Opcode == ARM::INLINEASM)
   1817     AddrMode = ARMII::AddrMode2;
   1818 
   1819   if (Opcode == ARM::ADDri) {
   1820     Offset += MI.getOperand(FrameRegIdx+1).getImm();
   1821     if (Offset == 0) {
   1822       // Turn it into a move.
   1823       MI.setDesc(TII.get(ARM::MOVr));
   1824       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   1825       MI.RemoveOperand(FrameRegIdx+1);
   1826       Offset = 0;
   1827       return true;
   1828     } else if (Offset < 0) {
   1829       Offset = -Offset;
   1830       isSub = true;
   1831       MI.setDesc(TII.get(ARM::SUBri));
   1832     }
   1833 
   1834     // Common case: small offset, fits into instruction.
   1835     if (ARM_AM::getSOImmVal(Offset) != -1) {
   1836       // Replace the FrameIndex with sp / fp
   1837       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   1838       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
   1839       Offset = 0;
   1840       return true;
   1841     }
   1842 
   1843     // Otherwise, pull as much of the immedidate into this ADDri/SUBri
   1844     // as possible.
   1845     unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);
   1846     unsigned ThisImmVal = Offset & ARM_AM::rotr32(0xFF, RotAmt);
   1847 
   1848     // We will handle these bits from offset, clear them.
   1849     Offset &= ~ThisImmVal;
   1850 
   1851     // Get the properly encoded SOImmVal field.
   1852     assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&
   1853            "Bit extraction didn't work?");
   1854     MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);
   1855  } else {
   1856     unsigned ImmIdx = 0;
   1857     int InstrOffs = 0;
   1858     unsigned NumBits = 0;
   1859     unsigned Scale = 1;
   1860     switch (AddrMode) {
   1861     case ARMII::AddrMode_i12: {
   1862       ImmIdx = FrameRegIdx + 1;
   1863       InstrOffs = MI.getOperand(ImmIdx).getImm();
   1864       NumBits = 12;
   1865       break;
   1866     }
   1867     case ARMII::AddrMode2: {
   1868       ImmIdx = FrameRegIdx+2;
   1869       InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());
   1870       if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
   1871         InstrOffs *= -1;
   1872       NumBits = 12;
   1873       break;
   1874     }
   1875     case ARMII::AddrMode3: {
   1876       ImmIdx = FrameRegIdx+2;
   1877       InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());
   1878       if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
   1879         InstrOffs *= -1;
   1880       NumBits = 8;
   1881       break;
   1882     }
   1883     case ARMII::AddrMode4:
   1884     case ARMII::AddrMode6:
   1885       // Can't fold any offset even if it's zero.
   1886       return false;
   1887     case ARMII::AddrMode5: {
   1888       ImmIdx = FrameRegIdx+1;
   1889       InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());
   1890       if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)
   1891         InstrOffs *= -1;
   1892       NumBits = 8;
   1893       Scale = 4;
   1894       break;
   1895     }
   1896     default:
   1897       llvm_unreachable("Unsupported addressing mode!");
   1898     }
   1899 
   1900     Offset += InstrOffs * Scale;
   1901     assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");
   1902     if (Offset < 0) {
   1903       Offset = -Offset;
   1904       isSub = true;
   1905     }
   1906 
   1907     // Attempt to fold address comp. if opcode has offset bits
   1908     if (NumBits > 0) {
   1909       // Common case: small offset, fits into instruction.
   1910       MachineOperand &ImmOp = MI.getOperand(ImmIdx);
   1911       int ImmedOffset = Offset / Scale;
   1912       unsigned Mask = (1 << NumBits) - 1;
   1913       if ((unsigned)Offset <= Mask * Scale) {
   1914         // Replace the FrameIndex with sp
   1915         MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   1916         // FIXME: When addrmode2 goes away, this will simplify (like the
   1917         // T2 version), as the LDR.i12 versions don't need the encoding
   1918         // tricks for the offset value.
   1919         if (isSub) {
   1920           if (AddrMode == ARMII::AddrMode_i12)
   1921             ImmedOffset = -ImmedOffset;
   1922           else
   1923             ImmedOffset |= 1 << NumBits;
   1924         }
   1925         ImmOp.ChangeToImmediate(ImmedOffset);
   1926         Offset = 0;
   1927         return true;
   1928       }
   1929 
   1930       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
   1931       ImmedOffset = ImmedOffset & Mask;
   1932       if (isSub) {
   1933         if (AddrMode == ARMII::AddrMode_i12)
   1934           ImmedOffset = -ImmedOffset;
   1935         else
   1936           ImmedOffset |= 1 << NumBits;
   1937       }
   1938       ImmOp.ChangeToImmediate(ImmedOffset);
   1939       Offset &= ~(Mask*Scale);
   1940     }
   1941   }
   1942 
   1943   Offset = (isSub) ? -Offset : Offset;
   1944   return Offset == 0;
   1945 }
   1946 
   1947 /// analyzeCompare - For a comparison instruction, return the source registers
   1948 /// in SrcReg and SrcReg2 if having two register operands, and the value it
   1949 /// compares against in CmpValue. Return true if the comparison instruction
   1950 /// can be analyzed.
   1951 bool ARMBaseInstrInfo::
   1952 analyzeCompare(const MachineInstr *MI, unsigned &SrcReg, unsigned &SrcReg2,
   1953                int &CmpMask, int &CmpValue) const {
   1954   switch (MI->getOpcode()) {
   1955   default: break;
   1956   case ARM::CMPri:
   1957   case ARM::t2CMPri:
   1958     SrcReg = MI->getOperand(0).getReg();
   1959     SrcReg2 = 0;
   1960     CmpMask = ~0;
   1961     CmpValue = MI->getOperand(1).getImm();
   1962     return true;
   1963   case ARM::CMPrr:
   1964   case ARM::t2CMPrr:
   1965     SrcReg = MI->getOperand(0).getReg();
   1966     SrcReg2 = MI->getOperand(1).getReg();
   1967     CmpMask = ~0;
   1968     CmpValue = 0;
   1969     return true;
   1970   case ARM::TSTri:
   1971   case ARM::t2TSTri:
   1972     SrcReg = MI->getOperand(0).getReg();
   1973     SrcReg2 = 0;
   1974     CmpMask = MI->getOperand(1).getImm();
   1975     CmpValue = 0;
   1976     return true;
   1977   }
   1978 
   1979   return false;
   1980 }
   1981 
   1982 /// isSuitableForMask - Identify a suitable 'and' instruction that
   1983 /// operates on the given source register and applies the same mask
   1984 /// as a 'tst' instruction. Provide a limited look-through for copies.
   1985 /// When successful, MI will hold the found instruction.
   1986 static bool isSuitableForMask(MachineInstr *&MI, unsigned SrcReg,
   1987                               int CmpMask, bool CommonUse) {
   1988   switch (MI->getOpcode()) {
   1989     case ARM::ANDri:
   1990     case ARM::t2ANDri:
   1991       if (CmpMask != MI->getOperand(2).getImm())
   1992         return false;
   1993       if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())
   1994         return true;
   1995       break;
   1996     case ARM::COPY: {
   1997       // Walk down one instruction which is potentially an 'and'.
   1998       const MachineInstr &Copy = *MI;
   1999       MachineBasicBlock::iterator AND(
   2000         llvm::next(MachineBasicBlock::iterator(MI)));
   2001       if (AND == MI->getParent()->end()) return false;
   2002       MI = AND;
   2003       return isSuitableForMask(MI, Copy.getOperand(0).getReg(),
   2004                                CmpMask, true);
   2005     }
   2006   }
   2007 
   2008   return false;
   2009 }
   2010 
   2011 /// getSwappedCondition - assume the flags are set by MI(a,b), return
   2012 /// the condition code if we modify the instructions such that flags are
   2013 /// set by MI(b,a).
   2014 inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) {
   2015   switch (CC) {
   2016   default: return ARMCC::AL;
   2017   case ARMCC::EQ: return ARMCC::EQ;
   2018   case ARMCC::NE: return ARMCC::NE;
   2019   case ARMCC::HS: return ARMCC::LS;
   2020   case ARMCC::LO: return ARMCC::HI;
   2021   case ARMCC::HI: return ARMCC::LO;
   2022   case ARMCC::LS: return ARMCC::HS;
   2023   case ARMCC::GE: return ARMCC::LE;
   2024   case ARMCC::LT: return ARMCC::GT;
   2025   case ARMCC::GT: return ARMCC::LT;
   2026   case ARMCC::LE: return ARMCC::GE;
   2027   }
   2028 }
   2029 
   2030 /// isRedundantFlagInstr - check whether the first instruction, whose only
   2031 /// purpose is to update flags, can be made redundant.
   2032 /// CMPrr can be made redundant by SUBrr if the operands are the same.
   2033 /// CMPri can be made redundant by SUBri if the operands are the same.
   2034 /// This function can be extended later on.
   2035 inline static bool isRedundantFlagInstr(MachineInstr *CmpI, unsigned SrcReg,
   2036                                         unsigned SrcReg2, int ImmValue,
   2037                                         MachineInstr *OI) {
   2038   if ((CmpI->getOpcode() == ARM::CMPrr ||
   2039        CmpI->getOpcode() == ARM::t2CMPrr) &&
   2040       (OI->getOpcode() == ARM::SUBrr ||
   2041        OI->getOpcode() == ARM::t2SUBrr) &&
   2042       ((OI->getOperand(1).getReg() == SrcReg &&
   2043         OI->getOperand(2).getReg() == SrcReg2) ||
   2044        (OI->getOperand(1).getReg() == SrcReg2 &&
   2045         OI->getOperand(2).getReg() == SrcReg)))
   2046     return true;
   2047 
   2048   if ((CmpI->getOpcode() == ARM::CMPri ||
   2049        CmpI->getOpcode() == ARM::t2CMPri) &&
   2050       (OI->getOpcode() == ARM::SUBri ||
   2051        OI->getOpcode() == ARM::t2SUBri) &&
   2052       OI->getOperand(1).getReg() == SrcReg &&
   2053       OI->getOperand(2).getImm() == ImmValue)
   2054     return true;
   2055   return false;
   2056 }
   2057 
   2058 /// optimizeCompareInstr - Convert the instruction supplying the argument to the
   2059 /// comparison into one that sets the zero bit in the flags register;
   2060 /// Remove a redundant Compare instruction if an earlier instruction can set the
   2061 /// flags in the same way as Compare.
   2062 /// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two
   2063 /// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the
   2064 /// condition code of instructions which use the flags.
   2065 bool ARMBaseInstrInfo::
   2066 optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2,
   2067                      int CmpMask, int CmpValue,
   2068                      const MachineRegisterInfo *MRI) const {
   2069   // Get the unique definition of SrcReg.
   2070   MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
   2071   if (!MI) return false;
   2072 
   2073   // Masked compares sometimes use the same register as the corresponding 'and'.
   2074   if (CmpMask != ~0) {
   2075     if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(MI)) {
   2076       MI = 0;
   2077       for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(SrcReg),
   2078            UE = MRI->use_end(); UI != UE; ++UI) {
   2079         if (UI->getParent() != CmpInstr->getParent()) continue;
   2080         MachineInstr *PotentialAND = &*UI;
   2081         if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||
   2082             isPredicated(PotentialAND))
   2083           continue;
   2084         MI = PotentialAND;
   2085         break;
   2086       }
   2087       if (!MI) return false;
   2088     }
   2089   }
   2090 
   2091   // Get ready to iterate backward from CmpInstr.
   2092   MachineBasicBlock::iterator I = CmpInstr, E = MI,
   2093                               B = CmpInstr->getParent()->begin();
   2094 
   2095   // Early exit if CmpInstr is at the beginning of the BB.
   2096   if (I == B) return false;
   2097 
   2098   // There are two possible candidates which can be changed to set CPSR:
   2099   // One is MI, the other is a SUB instruction.
   2100   // For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).
   2101   // For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).
   2102   MachineInstr *Sub = NULL;
   2103   if (SrcReg2 != 0)
   2104     // MI is not a candidate for CMPrr.
   2105     MI = NULL;
   2106   else if (MI->getParent() != CmpInstr->getParent() || CmpValue != 0) {
   2107     // Conservatively refuse to convert an instruction which isn't in the same
   2108     // BB as the comparison.
   2109     // For CMPri, we need to check Sub, thus we can't return here.
   2110     if (CmpInstr->getOpcode() == ARM::CMPri ||
   2111        CmpInstr->getOpcode() == ARM::t2CMPri)
   2112       MI = NULL;
   2113     else
   2114       return false;
   2115   }
   2116 
   2117   // Check that CPSR isn't set between the comparison instruction and the one we
   2118   // want to change. At the same time, search for Sub.
   2119   const TargetRegisterInfo *TRI = &getRegisterInfo();
   2120   --I;
   2121   for (; I != E; --I) {
   2122     const MachineInstr &Instr = *I;
   2123 
   2124     if (Instr.modifiesRegister(ARM::CPSR, TRI) ||
   2125         Instr.readsRegister(ARM::CPSR, TRI))
   2126       // This instruction modifies or uses CPSR after the one we want to
   2127       // change. We can't do this transformation.
   2128       return false;
   2129 
   2130     // Check whether CmpInstr can be made redundant by the current instruction.
   2131     if (isRedundantFlagInstr(CmpInstr, SrcReg, SrcReg2, CmpValue, &*I)) {
   2132       Sub = &*I;
   2133       break;
   2134     }
   2135 
   2136     if (I == B)
   2137       // The 'and' is below the comparison instruction.
   2138       return false;
   2139   }
   2140 
   2141   // Return false if no candidates exist.
   2142   if (!MI && !Sub)
   2143     return false;
   2144 
   2145   // The single candidate is called MI.
   2146   if (!MI) MI = Sub;
   2147 
   2148   // We can't use a predicated instruction - it doesn't always write the flags.
   2149   if (isPredicated(MI))
   2150     return false;
   2151 
   2152   switch (MI->getOpcode()) {
   2153   default: break;
   2154   case ARM::RSBrr:
   2155   case ARM::RSBri:
   2156   case ARM::RSCrr:
   2157   case ARM::RSCri:
   2158   case ARM::ADDrr:
   2159   case ARM::ADDri:
   2160   case ARM::ADCrr:
   2161   case ARM::ADCri:
   2162   case ARM::SUBrr:
   2163   case ARM::SUBri:
   2164   case ARM::SBCrr:
   2165   case ARM::SBCri:
   2166   case ARM::t2RSBri:
   2167   case ARM::t2ADDrr:
   2168   case ARM::t2ADDri:
   2169   case ARM::t2ADCrr:
   2170   case ARM::t2ADCri:
   2171   case ARM::t2SUBrr:
   2172   case ARM::t2SUBri:
   2173   case ARM::t2SBCrr:
   2174   case ARM::t2SBCri:
   2175   case ARM::ANDrr:
   2176   case ARM::ANDri:
   2177   case ARM::t2ANDrr:
   2178   case ARM::t2ANDri:
   2179   case ARM::ORRrr:
   2180   case ARM::ORRri:
   2181   case ARM::t2ORRrr:
   2182   case ARM::t2ORRri:
   2183   case ARM::EORrr:
   2184   case ARM::EORri:
   2185   case ARM::t2EORrr:
   2186   case ARM::t2EORri: {
   2187     // Scan forward for the use of CPSR
   2188     // When checking against MI: if it's a conditional code requires
   2189     // checking of V bit, then this is not safe to do.
   2190     // It is safe to remove CmpInstr if CPSR is redefined or killed.
   2191     // If we are done with the basic block, we need to check whether CPSR is
   2192     // live-out.
   2193     SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>
   2194         OperandsToUpdate;
   2195     bool isSafe = false;
   2196     I = CmpInstr;
   2197     E = CmpInstr->getParent()->end();
   2198     while (!isSafe && ++I != E) {
   2199       const MachineInstr &Instr = *I;
   2200       for (unsigned IO = 0, EO = Instr.getNumOperands();
   2201            !isSafe && IO != EO; ++IO) {
   2202         const MachineOperand &MO = Instr.getOperand(IO);
   2203         if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {
   2204           isSafe = true;
   2205           break;
   2206         }
   2207         if (!MO.isReg() || MO.getReg() != ARM::CPSR)
   2208           continue;
   2209         if (MO.isDef()) {
   2210           isSafe = true;
   2211           break;
   2212         }
   2213         // Condition code is after the operand before CPSR.
   2214         ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm();
   2215         if (Sub) {
   2216           ARMCC::CondCodes NewCC = getSwappedCondition(CC);
   2217           if (NewCC == ARMCC::AL)
   2218             return false;
   2219           // If we have SUB(r1, r2) and CMP(r2, r1), the condition code based
   2220           // on CMP needs to be updated to be based on SUB.
   2221           // Push the condition code operands to OperandsToUpdate.
   2222           // If it is safe to remove CmpInstr, the condition code of these
   2223           // operands will be modified.
   2224           if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&
   2225               Sub->getOperand(2).getReg() == SrcReg)
   2226             OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)),
   2227                                                       NewCC));
   2228         }
   2229         else
   2230           switch (CC) {
   2231           default:
   2232             // CPSR can be used multiple times, we should continue.
   2233             break;
   2234           case ARMCC::VS:
   2235           case ARMCC::VC:
   2236           case ARMCC::GE:
   2237           case ARMCC::LT:
   2238           case ARMCC::GT:
   2239           case ARMCC::LE:
   2240             return false;
   2241           }
   2242       }
   2243     }
   2244 
   2245     // If CPSR is not killed nor re-defined, we should check whether it is
   2246     // live-out. If it is live-out, do not optimize.
   2247     if (!isSafe) {
   2248       MachineBasicBlock *MBB = CmpInstr->getParent();
   2249       for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
   2250                SE = MBB->succ_end(); SI != SE; ++SI)
   2251         if ((*SI)->isLiveIn(ARM::CPSR))
   2252           return false;
   2253     }
   2254 
   2255     // Toggle the optional operand to CPSR.
   2256     MI->getOperand(5).setReg(ARM::CPSR);
   2257     MI->getOperand(5).setIsDef(true);
   2258     assert(!isPredicated(MI) && "Can't use flags from predicated instruction");
   2259     CmpInstr->eraseFromParent();
   2260 
   2261     // Modify the condition code of operands in OperandsToUpdate.
   2262     // Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to
   2263     // be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.
   2264     for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)
   2265       OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);
   2266     return true;
   2267   }
   2268   }
   2269 
   2270   return false;
   2271 }
   2272 
   2273 bool ARMBaseInstrInfo::FoldImmediate(MachineInstr *UseMI,
   2274                                      MachineInstr *DefMI, unsigned Reg,
   2275                                      MachineRegisterInfo *MRI) const {
   2276   // Fold large immediates into add, sub, or, xor.
   2277   unsigned DefOpc = DefMI->getOpcode();
   2278   if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm)
   2279     return false;
   2280   if (!DefMI->getOperand(1).isImm())
   2281     // Could be t2MOVi32imm <ga:xx>
   2282     return false;
   2283 
   2284   if (!MRI->hasOneNonDBGUse(Reg))
   2285     return false;
   2286 
   2287   const MCInstrDesc &DefMCID = DefMI->getDesc();
   2288   if (DefMCID.hasOptionalDef()) {
   2289     unsigned NumOps = DefMCID.getNumOperands();
   2290     const MachineOperand &MO = DefMI->getOperand(NumOps-1);
   2291     if (MO.getReg() == ARM::CPSR && !MO.isDead())
   2292       // If DefMI defines CPSR and it is not dead, it's obviously not safe
   2293       // to delete DefMI.
   2294       return false;
   2295   }
   2296 
   2297   const MCInstrDesc &UseMCID = UseMI->getDesc();
   2298   if (UseMCID.hasOptionalDef()) {
   2299     unsigned NumOps = UseMCID.getNumOperands();
   2300     if (UseMI->getOperand(NumOps-1).getReg() == ARM::CPSR)
   2301       // If the instruction sets the flag, do not attempt this optimization
   2302       // since it may change the semantics of the code.
   2303       return false;
   2304   }
   2305 
   2306   unsigned UseOpc = UseMI->getOpcode();
   2307   unsigned NewUseOpc = 0;
   2308   uint32_t ImmVal = (uint32_t)DefMI->getOperand(1).getImm();
   2309   uint32_t SOImmValV1 = 0, SOImmValV2 = 0;
   2310   bool Commute = false;
   2311   switch (UseOpc) {
   2312   default: return false;
   2313   case ARM::SUBrr:
   2314   case ARM::ADDrr:
   2315   case ARM::ORRrr:
   2316   case ARM::EORrr:
   2317   case ARM::t2SUBrr:
   2318   case ARM::t2ADDrr:
   2319   case ARM::t2ORRrr:
   2320   case ARM::t2EORrr: {
   2321     Commute = UseMI->getOperand(2).getReg() != Reg;
   2322     switch (UseOpc) {
   2323     default: break;
   2324     case ARM::SUBrr: {
   2325       if (Commute)
   2326         return false;
   2327       ImmVal = -ImmVal;
   2328       NewUseOpc = ARM::SUBri;
   2329       // Fallthrough
   2330     }
   2331     case ARM::ADDrr:
   2332     case ARM::ORRrr:
   2333     case ARM::EORrr: {
   2334       if (!ARM_AM::isSOImmTwoPartVal(ImmVal))
   2335         return false;
   2336       SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);
   2337       SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);
   2338       switch (UseOpc) {
   2339       default: break;
   2340       case ARM::ADDrr: NewUseOpc = ARM::ADDri; break;
   2341       case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;
   2342       case ARM::EORrr: NewUseOpc = ARM::EORri; break;
   2343       }
   2344       break;
   2345     }
   2346     case ARM::t2SUBrr: {
   2347       if (Commute)
   2348         return false;
   2349       ImmVal = -ImmVal;
   2350       NewUseOpc = ARM::t2SUBri;
   2351       // Fallthrough
   2352     }
   2353     case ARM::t2ADDrr:
   2354     case ARM::t2ORRrr:
   2355     case ARM::t2EORrr: {
   2356       if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))
   2357         return false;
   2358       SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);
   2359       SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);
   2360       switch (UseOpc) {
   2361       default: break;
   2362       case ARM::t2ADDrr: NewUseOpc = ARM::t2ADDri; break;
   2363       case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;
   2364       case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;
   2365       }
   2366       break;
   2367     }
   2368     }
   2369   }
   2370   }
   2371 
   2372   unsigned OpIdx = Commute ? 2 : 1;
   2373   unsigned Reg1 = UseMI->getOperand(OpIdx).getReg();
   2374   bool isKill = UseMI->getOperand(OpIdx).isKill();
   2375   unsigned NewReg = MRI->createVirtualRegister(MRI->getRegClass(Reg));
   2376   AddDefaultCC(AddDefaultPred(BuildMI(*UseMI->getParent(),
   2377                                       UseMI, UseMI->getDebugLoc(),
   2378                                       get(NewUseOpc), NewReg)
   2379                               .addReg(Reg1, getKillRegState(isKill))
   2380                               .addImm(SOImmValV1)));
   2381   UseMI->setDesc(get(NewUseOpc));
   2382   UseMI->getOperand(1).setReg(NewReg);
   2383   UseMI->getOperand(1).setIsKill();
   2384   UseMI->getOperand(2).ChangeToImmediate(SOImmValV2);
   2385   DefMI->eraseFromParent();
   2386   return true;
   2387 }
   2388 
   2389 static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,
   2390                                         const MachineInstr *MI) {
   2391   switch (MI->getOpcode()) {
   2392   default: {
   2393     const MCInstrDesc &Desc = MI->getDesc();
   2394     int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());
   2395     assert(UOps >= 0 && "bad # UOps");
   2396     return UOps;
   2397   }
   2398 
   2399   case ARM::LDRrs:
   2400   case ARM::LDRBrs:
   2401   case ARM::STRrs:
   2402   case ARM::STRBrs: {
   2403     unsigned ShOpVal = MI->getOperand(3).getImm();
   2404     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   2405     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   2406     if (!isSub &&
   2407         (ShImm == 0 ||
   2408          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   2409           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   2410       return 1;
   2411     return 2;
   2412   }
   2413 
   2414   case ARM::LDRH:
   2415   case ARM::STRH: {
   2416     if (!MI->getOperand(2).getReg())
   2417       return 1;
   2418 
   2419     unsigned ShOpVal = MI->getOperand(3).getImm();
   2420     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   2421     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   2422     if (!isSub &&
   2423         (ShImm == 0 ||
   2424          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   2425           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   2426       return 1;
   2427     return 2;
   2428   }
   2429 
   2430   case ARM::LDRSB:
   2431   case ARM::LDRSH:
   2432     return (ARM_AM::getAM3Op(MI->getOperand(3).getImm()) == ARM_AM::sub) ? 3:2;
   2433 
   2434   case ARM::LDRSB_POST:
   2435   case ARM::LDRSH_POST: {
   2436     unsigned Rt = MI->getOperand(0).getReg();
   2437     unsigned Rm = MI->getOperand(3).getReg();
   2438     return (Rt == Rm) ? 4 : 3;
   2439   }
   2440 
   2441   case ARM::LDR_PRE_REG:
   2442   case ARM::LDRB_PRE_REG: {
   2443     unsigned Rt = MI->getOperand(0).getReg();
   2444     unsigned Rm = MI->getOperand(3).getReg();
   2445     if (Rt == Rm)
   2446       return 3;
   2447     unsigned ShOpVal = MI->getOperand(4).getImm();
   2448     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   2449     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   2450     if (!isSub &&
   2451         (ShImm == 0 ||
   2452          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   2453           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   2454       return 2;
   2455     return 3;
   2456   }
   2457 
   2458   case ARM::STR_PRE_REG:
   2459   case ARM::STRB_PRE_REG: {
   2460     unsigned ShOpVal = MI->getOperand(4).getImm();
   2461     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   2462     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   2463     if (!isSub &&
   2464         (ShImm == 0 ||
   2465          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   2466           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   2467       return 2;
   2468     return 3;
   2469   }
   2470 
   2471   case ARM::LDRH_PRE:
   2472   case ARM::STRH_PRE: {
   2473     unsigned Rt = MI->getOperand(0).getReg();
   2474     unsigned Rm = MI->getOperand(3).getReg();
   2475     if (!Rm)
   2476       return 2;
   2477     if (Rt == Rm)
   2478       return 3;
   2479     return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub)
   2480       ? 3 : 2;
   2481   }
   2482 
   2483   case ARM::LDR_POST_REG:
   2484   case ARM::LDRB_POST_REG:
   2485   case ARM::LDRH_POST: {
   2486     unsigned Rt = MI->getOperand(0).getReg();
   2487     unsigned Rm = MI->getOperand(3).getReg();
   2488     return (Rt == Rm) ? 3 : 2;
   2489   }
   2490 
   2491   case ARM::LDR_PRE_IMM:
   2492   case ARM::LDRB_PRE_IMM:
   2493   case ARM::LDR_POST_IMM:
   2494   case ARM::LDRB_POST_IMM:
   2495   case ARM::STRB_POST_IMM:
   2496   case ARM::STRB_POST_REG:
   2497   case ARM::STRB_PRE_IMM:
   2498   case ARM::STRH_POST:
   2499   case ARM::STR_POST_IMM:
   2500   case ARM::STR_POST_REG:
   2501   case ARM::STR_PRE_IMM:
   2502     return 2;
   2503 
   2504   case ARM::LDRSB_PRE:
   2505   case ARM::LDRSH_PRE: {
   2506     unsigned Rm = MI->getOperand(3).getReg();
   2507     if (Rm == 0)
   2508       return 3;
   2509     unsigned Rt = MI->getOperand(0).getReg();
   2510     if (Rt == Rm)
   2511       return 4;
   2512     unsigned ShOpVal = MI->getOperand(4).getImm();
   2513     bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   2514     unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   2515     if (!isSub &&
   2516         (ShImm == 0 ||
   2517          ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   2518           ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   2519       return 3;
   2520     return 4;
   2521   }
   2522 
   2523   case ARM::LDRD: {
   2524     unsigned Rt = MI->getOperand(0).getReg();
   2525     unsigned Rn = MI->getOperand(2).getReg();
   2526     unsigned Rm = MI->getOperand(3).getReg();
   2527     if (Rm)
   2528       return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
   2529     return (Rt == Rn) ? 3 : 2;
   2530   }
   2531 
   2532   case ARM::STRD: {
   2533     unsigned Rm = MI->getOperand(3).getReg();
   2534     if (Rm)
   2535       return (ARM_AM::getAM3Op(MI->getOperand(4).getImm()) == ARM_AM::sub) ?4:3;
   2536     return 2;
   2537   }
   2538 
   2539   case ARM::LDRD_POST:
   2540   case ARM::t2LDRD_POST:
   2541     return 3;
   2542 
   2543   case ARM::STRD_POST:
   2544   case ARM::t2STRD_POST:
   2545     return 4;
   2546 
   2547   case ARM::LDRD_PRE: {
   2548     unsigned Rt = MI->getOperand(0).getReg();
   2549     unsigned Rn = MI->getOperand(3).getReg();
   2550     unsigned Rm = MI->getOperand(4).getReg();
   2551     if (Rm)
   2552       return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
   2553     return (Rt == Rn) ? 4 : 3;
   2554   }
   2555 
   2556   case ARM::t2LDRD_PRE: {
   2557     unsigned Rt = MI->getOperand(0).getReg();
   2558     unsigned Rn = MI->getOperand(3).getReg();
   2559     return (Rt == Rn) ? 4 : 3;
   2560   }
   2561 
   2562   case ARM::STRD_PRE: {
   2563     unsigned Rm = MI->getOperand(4).getReg();
   2564     if (Rm)
   2565       return (ARM_AM::getAM3Op(MI->getOperand(5).getImm()) == ARM_AM::sub) ?5:4;
   2566     return 3;
   2567   }
   2568 
   2569   case ARM::t2STRD_PRE:
   2570     return 3;
   2571 
   2572   case ARM::t2LDR_POST:
   2573   case ARM::t2LDRB_POST:
   2574   case ARM::t2LDRB_PRE:
   2575   case ARM::t2LDRSBi12:
   2576   case ARM::t2LDRSBi8:
   2577   case ARM::t2LDRSBpci:
   2578   case ARM::t2LDRSBs:
   2579   case ARM::t2LDRH_POST:
   2580   case ARM::t2LDRH_PRE:
   2581   case ARM::t2LDRSBT:
   2582   case ARM::t2LDRSB_POST:
   2583   case ARM::t2LDRSB_PRE:
   2584   case ARM::t2LDRSH_POST:
   2585   case ARM::t2LDRSH_PRE:
   2586   case ARM::t2LDRSHi12:
   2587   case ARM::t2LDRSHi8:
   2588   case ARM::t2LDRSHpci:
   2589   case ARM::t2LDRSHs:
   2590     return 2;
   2591 
   2592   case ARM::t2LDRDi8: {
   2593     unsigned Rt = MI->getOperand(0).getReg();
   2594     unsigned Rn = MI->getOperand(2).getReg();
   2595     return (Rt == Rn) ? 3 : 2;
   2596   }
   2597 
   2598   case ARM::t2STRB_POST:
   2599   case ARM::t2STRB_PRE:
   2600   case ARM::t2STRBs:
   2601   case ARM::t2STRDi8:
   2602   case ARM::t2STRH_POST:
   2603   case ARM::t2STRH_PRE:
   2604   case ARM::t2STRHs:
   2605   case ARM::t2STR_POST:
   2606   case ARM::t2STR_PRE:
   2607   case ARM::t2STRs:
   2608     return 2;
   2609   }
   2610 }
   2611 
   2612 // Return the number of 32-bit words loaded by LDM or stored by STM. If this
   2613 // can't be easily determined return 0 (missing MachineMemOperand).
   2614 //
   2615 // FIXME: The current MachineInstr design does not support relying on machine
   2616 // mem operands to determine the width of a memory access. Instead, we expect
   2617 // the target to provide this information based on the instruction opcode and
   2618 // operands. However, using MachineMemOperand is a the best solution now for
   2619 // two reasons:
   2620 //
   2621 // 1) getNumMicroOps tries to infer LDM memory width from the total number of MI
   2622 // operands. This is much more dangerous than using the MachineMemOperand
   2623 // sizes because CodeGen passes can insert/remove optional machine operands. In
   2624 // fact, it's totally incorrect for preRA passes and appears to be wrong for
   2625 // postRA passes as well.
   2626 //
   2627 // 2) getNumLDMAddresses is only used by the scheduling machine model and any
   2628 // machine model that calls this should handle the unknown (zero size) case.
   2629 //
   2630 // Long term, we should require a target hook that verifies MachineMemOperand
   2631 // sizes during MC lowering. That target hook should be local to MC lowering
   2632 // because we can't ensure that it is aware of other MI forms. Doing this will
   2633 // ensure that MachineMemOperands are correctly propagated through all passes.
   2634 unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr *MI) const {
   2635   unsigned Size = 0;
   2636   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
   2637          E = MI->memoperands_end(); I != E; ++I) {
   2638     Size += (*I)->getSize();
   2639   }
   2640   return Size / 4;
   2641 }
   2642 
   2643 unsigned
   2644 ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   2645                                  const MachineInstr *MI) const {
   2646   if (!ItinData || ItinData->isEmpty())
   2647     return 1;
   2648 
   2649   const MCInstrDesc &Desc = MI->getDesc();
   2650   unsigned Class = Desc.getSchedClass();
   2651   int ItinUOps = ItinData->getNumMicroOps(Class);
   2652   if (ItinUOps >= 0) {
   2653     if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))
   2654       return getNumMicroOpsSwiftLdSt(ItinData, MI);
   2655 
   2656     return ItinUOps;
   2657   }
   2658 
   2659   unsigned Opc = MI->getOpcode();
   2660   switch (Opc) {
   2661   default:
   2662     llvm_unreachable("Unexpected multi-uops instruction!");
   2663   case ARM::VLDMQIA:
   2664   case ARM::VSTMQIA:
   2665     return 2;
   2666 
   2667   // The number of uOps for load / store multiple are determined by the number
   2668   // registers.
   2669   //
   2670   // On Cortex-A8, each pair of register loads / stores can be scheduled on the
   2671   // same cycle. The scheduling for the first load / store must be done
   2672   // separately by assuming the address is not 64-bit aligned.
   2673   //
   2674   // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
   2675   // is not 64-bit aligned, then AGU would take an extra cycle.  For VFP / NEON
   2676   // load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.
   2677   case ARM::VLDMDIA:
   2678   case ARM::VLDMDIA_UPD:
   2679   case ARM::VLDMDDB_UPD:
   2680   case ARM::VLDMSIA:
   2681   case ARM::VLDMSIA_UPD:
   2682   case ARM::VLDMSDB_UPD:
   2683   case ARM::VSTMDIA:
   2684   case ARM::VSTMDIA_UPD:
   2685   case ARM::VSTMDDB_UPD:
   2686   case ARM::VSTMSIA:
   2687   case ARM::VSTMSIA_UPD:
   2688   case ARM::VSTMSDB_UPD: {
   2689     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
   2690     return (NumRegs / 2) + (NumRegs % 2) + 1;
   2691   }
   2692 
   2693   case ARM::LDMIA_RET:
   2694   case ARM::LDMIA:
   2695   case ARM::LDMDA:
   2696   case ARM::LDMDB:
   2697   case ARM::LDMIB:
   2698   case ARM::LDMIA_UPD:
   2699   case ARM::LDMDA_UPD:
   2700   case ARM::LDMDB_UPD:
   2701   case ARM::LDMIB_UPD:
   2702   case ARM::STMIA:
   2703   case ARM::STMDA:
   2704   case ARM::STMDB:
   2705   case ARM::STMIB:
   2706   case ARM::STMIA_UPD:
   2707   case ARM::STMDA_UPD:
   2708   case ARM::STMDB_UPD:
   2709   case ARM::STMIB_UPD:
   2710   case ARM::tLDMIA:
   2711   case ARM::tLDMIA_UPD:
   2712   case ARM::tSTMIA_UPD:
   2713   case ARM::tPOP_RET:
   2714   case ARM::tPOP:
   2715   case ARM::tPUSH:
   2716   case ARM::t2LDMIA_RET:
   2717   case ARM::t2LDMIA:
   2718   case ARM::t2LDMDB:
   2719   case ARM::t2LDMIA_UPD:
   2720   case ARM::t2LDMDB_UPD:
   2721   case ARM::t2STMIA:
   2722   case ARM::t2STMDB:
   2723   case ARM::t2STMIA_UPD:
   2724   case ARM::t2STMDB_UPD: {
   2725     unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
   2726     if (Subtarget.isSwift()) {
   2727       int UOps = 1 + NumRegs;  // One for address computation, one for each ld / st.
   2728       switch (Opc) {
   2729       default: break;
   2730       case ARM::VLDMDIA_UPD:
   2731       case ARM::VLDMDDB_UPD:
   2732       case ARM::VLDMSIA_UPD:
   2733       case ARM::VLDMSDB_UPD:
   2734       case ARM::VSTMDIA_UPD:
   2735       case ARM::VSTMDDB_UPD:
   2736       case ARM::VSTMSIA_UPD:
   2737       case ARM::VSTMSDB_UPD:
   2738       case ARM::LDMIA_UPD:
   2739       case ARM::LDMDA_UPD:
   2740       case ARM::LDMDB_UPD:
   2741       case ARM::LDMIB_UPD:
   2742       case ARM::STMIA_UPD:
   2743       case ARM::STMDA_UPD:
   2744       case ARM::STMDB_UPD:
   2745       case ARM::STMIB_UPD:
   2746       case ARM::tLDMIA_UPD:
   2747       case ARM::tSTMIA_UPD:
   2748       case ARM::t2LDMIA_UPD:
   2749       case ARM::t2LDMDB_UPD:
   2750       case ARM::t2STMIA_UPD:
   2751       case ARM::t2STMDB_UPD:
   2752         ++UOps; // One for base register writeback.
   2753         break;
   2754       case ARM::LDMIA_RET:
   2755       case ARM::tPOP_RET:
   2756       case ARM::t2LDMIA_RET:
   2757         UOps += 2; // One for base reg wb, one for write to pc.
   2758         break;
   2759       }
   2760       return UOps;
   2761     } else if (Subtarget.isCortexA8()) {
   2762       if (NumRegs < 4)
   2763         return 2;
   2764       // 4 registers would be issued: 2, 2.
   2765       // 5 registers would be issued: 2, 2, 1.
   2766       int A8UOps = (NumRegs / 2);
   2767       if (NumRegs % 2)
   2768         ++A8UOps;
   2769       return A8UOps;
   2770     } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
   2771       int A9UOps = (NumRegs / 2);
   2772       // If there are odd number of registers or if it's not 64-bit aligned,
   2773       // then it takes an extra AGU (Address Generation Unit) cycle.
   2774       if ((NumRegs % 2) ||
   2775           !MI->hasOneMemOperand() ||
   2776           (*MI->memoperands_begin())->getAlignment() < 8)
   2777         ++A9UOps;
   2778       return A9UOps;
   2779     } else {
   2780       // Assume the worst.
   2781       return NumRegs;
   2782     }
   2783   }
   2784   }
   2785 }
   2786 
   2787 int
   2788 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
   2789                                   const MCInstrDesc &DefMCID,
   2790                                   unsigned DefClass,
   2791                                   unsigned DefIdx, unsigned DefAlign) const {
   2792   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   2793   if (RegNo <= 0)
   2794     // Def is the address writeback.
   2795     return ItinData->getOperandCycle(DefClass, DefIdx);
   2796 
   2797   int DefCycle;
   2798   if (Subtarget.isCortexA8()) {
   2799     // (regno / 2) + (regno % 2) + 1
   2800     DefCycle = RegNo / 2 + 1;
   2801     if (RegNo % 2)
   2802       ++DefCycle;
   2803   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
   2804     DefCycle = RegNo;
   2805     bool isSLoad = false;
   2806 
   2807     switch (DefMCID.getOpcode()) {
   2808     default: break;
   2809     case ARM::VLDMSIA:
   2810     case ARM::VLDMSIA_UPD:
   2811     case ARM::VLDMSDB_UPD:
   2812       isSLoad = true;
   2813       break;
   2814     }
   2815 
   2816     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
   2817     // then it takes an extra cycle.
   2818     if ((isSLoad && (RegNo % 2)) || DefAlign < 8)
   2819       ++DefCycle;
   2820   } else {
   2821     // Assume the worst.
   2822     DefCycle = RegNo + 2;
   2823   }
   2824 
   2825   return DefCycle;
   2826 }
   2827 
   2828 int
   2829 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
   2830                                  const MCInstrDesc &DefMCID,
   2831                                  unsigned DefClass,
   2832                                  unsigned DefIdx, unsigned DefAlign) const {
   2833   int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   2834   if (RegNo <= 0)
   2835     // Def is the address writeback.
   2836     return ItinData->getOperandCycle(DefClass, DefIdx);
   2837 
   2838   int DefCycle;
   2839   if (Subtarget.isCortexA8()) {
   2840     // 4 registers would be issued: 1, 2, 1.
   2841     // 5 registers would be issued: 1, 2, 2.
   2842     DefCycle = RegNo / 2;
   2843     if (DefCycle < 1)
   2844       DefCycle = 1;
   2845     // Result latency is issue cycle + 2: E2.
   2846     DefCycle += 2;
   2847   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
   2848     DefCycle = (RegNo / 2);
   2849     // If there are odd number of registers or if it's not 64-bit aligned,
   2850     // then it takes an extra AGU (Address Generation Unit) cycle.
   2851     if ((RegNo % 2) || DefAlign < 8)
   2852       ++DefCycle;
   2853     // Result latency is AGU cycles + 2.
   2854     DefCycle += 2;
   2855   } else {
   2856     // Assume the worst.
   2857     DefCycle = RegNo + 2;
   2858   }
   2859 
   2860   return DefCycle;
   2861 }
   2862 
   2863 int
   2864 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
   2865                                   const MCInstrDesc &UseMCID,
   2866                                   unsigned UseClass,
   2867                                   unsigned UseIdx, unsigned UseAlign) const {
   2868   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   2869   if (RegNo <= 0)
   2870     return ItinData->getOperandCycle(UseClass, UseIdx);
   2871 
   2872   int UseCycle;
   2873   if (Subtarget.isCortexA8()) {
   2874     // (regno / 2) + (regno % 2) + 1
   2875     UseCycle = RegNo / 2 + 1;
   2876     if (RegNo % 2)
   2877       ++UseCycle;
   2878   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
   2879     UseCycle = RegNo;
   2880     bool isSStore = false;
   2881 
   2882     switch (UseMCID.getOpcode()) {
   2883     default: break;
   2884     case ARM::VSTMSIA:
   2885     case ARM::VSTMSIA_UPD:
   2886     case ARM::VSTMSDB_UPD:
   2887       isSStore = true;
   2888       break;
   2889     }
   2890 
   2891     // If there are odd number of 'S' registers or if it's not 64-bit aligned,
   2892     // then it takes an extra cycle.
   2893     if ((isSStore && (RegNo % 2)) || UseAlign < 8)
   2894       ++UseCycle;
   2895   } else {
   2896     // Assume the worst.
   2897     UseCycle = RegNo + 2;
   2898   }
   2899 
   2900   return UseCycle;
   2901 }
   2902 
   2903 int
   2904 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
   2905                                  const MCInstrDesc &UseMCID,
   2906                                  unsigned UseClass,
   2907                                  unsigned UseIdx, unsigned UseAlign) const {
   2908   int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   2909   if (RegNo <= 0)
   2910     return ItinData->getOperandCycle(UseClass, UseIdx);
   2911 
   2912   int UseCycle;
   2913   if (Subtarget.isCortexA8()) {
   2914     UseCycle = RegNo / 2;
   2915     if (UseCycle < 2)
   2916       UseCycle = 2;
   2917     // Read in E3.
   2918     UseCycle += 2;
   2919   } else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {
   2920     UseCycle = (RegNo / 2);
   2921     // If there are odd number of registers or if it's not 64-bit aligned,
   2922     // then it takes an extra AGU (Address Generation Unit) cycle.
   2923     if ((RegNo % 2) || UseAlign < 8)
   2924       ++UseCycle;
   2925   } else {
   2926     // Assume the worst.
   2927     UseCycle = 1;
   2928   }
   2929   return UseCycle;
   2930 }
   2931 
   2932 int
   2933 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   2934                                     const MCInstrDesc &DefMCID,
   2935                                     unsigned DefIdx, unsigned DefAlign,
   2936                                     const MCInstrDesc &UseMCID,
   2937                                     unsigned UseIdx, unsigned UseAlign) const {
   2938   unsigned DefClass = DefMCID.getSchedClass();
   2939   unsigned UseClass = UseMCID.getSchedClass();
   2940 
   2941   if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
   2942     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
   2943 
   2944   // This may be a def / use of a variable_ops instruction, the operand
   2945   // latency might be determinable dynamically. Let the target try to
   2946   // figure it out.
   2947   int DefCycle = -1;
   2948   bool LdmBypass = false;
   2949   switch (DefMCID.getOpcode()) {
   2950   default:
   2951     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
   2952     break;
   2953 
   2954   case ARM::VLDMDIA:
   2955   case ARM::VLDMDIA_UPD:
   2956   case ARM::VLDMDDB_UPD:
   2957   case ARM::VLDMSIA:
   2958   case ARM::VLDMSIA_UPD:
   2959   case ARM::VLDMSDB_UPD:
   2960     DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
   2961     break;
   2962 
   2963   case ARM::LDMIA_RET:
   2964   case ARM::LDMIA:
   2965   case ARM::LDMDA:
   2966   case ARM::LDMDB:
   2967   case ARM::LDMIB:
   2968   case ARM::LDMIA_UPD:
   2969   case ARM::LDMDA_UPD:
   2970   case ARM::LDMDB_UPD:
   2971   case ARM::LDMIB_UPD:
   2972   case ARM::tLDMIA:
   2973   case ARM::tLDMIA_UPD:
   2974   case ARM::tPUSH:
   2975   case ARM::t2LDMIA_RET:
   2976   case ARM::t2LDMIA:
   2977   case ARM::t2LDMDB:
   2978   case ARM::t2LDMIA_UPD:
   2979   case ARM::t2LDMDB_UPD:
   2980     LdmBypass = 1;
   2981     DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
   2982     break;
   2983   }
   2984 
   2985   if (DefCycle == -1)
   2986     // We can't seem to determine the result latency of the def, assume it's 2.
   2987     DefCycle = 2;
   2988 
   2989   int UseCycle = -1;
   2990   switch (UseMCID.getOpcode()) {
   2991   default:
   2992     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
   2993     break;
   2994 
   2995   case ARM::VSTMDIA:
   2996   case ARM::VSTMDIA_UPD:
   2997   case ARM::VSTMDDB_UPD:
   2998   case ARM::VSTMSIA:
   2999   case ARM::VSTMSIA_UPD:
   3000   case ARM::VSTMSDB_UPD:
   3001     UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
   3002     break;
   3003 
   3004   case ARM::STMIA:
   3005   case ARM::STMDA:
   3006   case ARM::STMDB:
   3007   case ARM::STMIB:
   3008   case ARM::STMIA_UPD:
   3009   case ARM::STMDA_UPD:
   3010   case ARM::STMDB_UPD:
   3011   case ARM::STMIB_UPD:
   3012   case ARM::tSTMIA_UPD:
   3013   case ARM::tPOP_RET:
   3014   case ARM::tPOP:
   3015   case ARM::t2STMIA:
   3016   case ARM::t2STMDB:
   3017   case ARM::t2STMIA_UPD:
   3018   case ARM::t2STMDB_UPD:
   3019     UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
   3020     break;
   3021   }
   3022 
   3023   if (UseCycle == -1)
   3024     // Assume it's read in the first stage.
   3025     UseCycle = 1;
   3026 
   3027   UseCycle = DefCycle - UseCycle + 1;
   3028   if (UseCycle > 0) {
   3029     if (LdmBypass) {
   3030       // It's a variable_ops instruction so we can't use DefIdx here. Just use
   3031       // first def operand.
   3032       if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
   3033                                           UseClass, UseIdx))
   3034         --UseCycle;
   3035     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
   3036                                                UseClass, UseIdx)) {
   3037       --UseCycle;
   3038     }
   3039   }
   3040 
   3041   return UseCycle;
   3042 }
   3043 
   3044 static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,
   3045                                            const MachineInstr *MI, unsigned Reg,
   3046                                            unsigned &DefIdx, unsigned &Dist) {
   3047   Dist = 0;
   3048 
   3049   MachineBasicBlock::const_iterator I = MI; ++I;
   3050   MachineBasicBlock::const_instr_iterator II =
   3051     llvm::prior(I.getInstrIterator());
   3052   assert(II->isInsideBundle() && "Empty bundle?");
   3053 
   3054   int Idx = -1;
   3055   while (II->isInsideBundle()) {
   3056     Idx = II->findRegisterDefOperandIdx(Reg, false, true, TRI);
   3057     if (Idx != -1)
   3058       break;
   3059     --II;
   3060     ++Dist;
   3061   }
   3062 
   3063   assert(Idx != -1 && "Cannot find bundled definition!");
   3064   DefIdx = Idx;
   3065   return II;
   3066 }
   3067 
   3068 static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
   3069                                            const MachineInstr *MI, unsigned Reg,
   3070                                            unsigned &UseIdx, unsigned &Dist) {
   3071   Dist = 0;
   3072 
   3073   MachineBasicBlock::const_instr_iterator II = MI; ++II;
   3074   assert(II->isInsideBundle() && "Empty bundle?");
   3075   MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
   3076 
   3077   // FIXME: This doesn't properly handle multiple uses.
   3078   int Idx = -1;
   3079   while (II != E && II->isInsideBundle()) {
   3080     Idx = II->findRegisterUseOperandIdx(Reg, false, TRI);
   3081     if (Idx != -1)
   3082       break;
   3083     if (II->getOpcode() != ARM::t2IT)
   3084       ++Dist;
   3085     ++II;
   3086   }
   3087 
   3088   if (Idx == -1) {
   3089     Dist = 0;
   3090     return 0;
   3091   }
   3092 
   3093   UseIdx = Idx;
   3094   return II;
   3095 }
   3096 
   3097 /// Return the number of cycles to add to (or subtract from) the static
   3098 /// itinerary based on the def opcode and alignment. The caller will ensure that
   3099 /// adjusted latency is at least one cycle.
   3100 static int adjustDefLatency(const ARMSubtarget &Subtarget,
   3101                             const MachineInstr *DefMI,
   3102                             const MCInstrDesc *DefMCID, unsigned DefAlign) {
   3103   int Adjust = 0;
   3104   if (Subtarget.isCortexA8() || Subtarget.isLikeA9()) {
   3105     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
   3106     // variants are one cycle cheaper.
   3107     switch (DefMCID->getOpcode()) {
   3108     default: break;
   3109     case ARM::LDRrs:
   3110     case ARM::LDRBrs: {
   3111       unsigned ShOpVal = DefMI->getOperand(3).getImm();
   3112       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   3113       if (ShImm == 0 ||
   3114           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
   3115         --Adjust;
   3116       break;
   3117     }
   3118     case ARM::t2LDRs:
   3119     case ARM::t2LDRBs:
   3120     case ARM::t2LDRHs:
   3121     case ARM::t2LDRSHs: {
   3122       // Thumb2 mode: lsl only.
   3123       unsigned ShAmt = DefMI->getOperand(3).getImm();
   3124       if (ShAmt == 0 || ShAmt == 2)
   3125         --Adjust;
   3126       break;
   3127     }
   3128     }
   3129   } else if (Subtarget.isSwift()) {
   3130     // FIXME: Properly handle all of the latency adjustments for address
   3131     // writeback.
   3132     switch (DefMCID->getOpcode()) {
   3133     default: break;
   3134     case ARM::LDRrs:
   3135     case ARM::LDRBrs: {
   3136       unsigned ShOpVal = DefMI->getOperand(3).getImm();
   3137       bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;
   3138       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   3139       if (!isSub &&
   3140           (ShImm == 0 ||
   3141            ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   3142             ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))
   3143         Adjust -= 2;
   3144       else if (!isSub &&
   3145                ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
   3146         --Adjust;
   3147       break;
   3148     }
   3149     case ARM::t2LDRs:
   3150     case ARM::t2LDRBs:
   3151     case ARM::t2LDRHs:
   3152     case ARM::t2LDRSHs: {
   3153       // Thumb2 mode: lsl only.
   3154       unsigned ShAmt = DefMI->getOperand(3).getImm();
   3155       if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)
   3156         Adjust -= 2;
   3157       break;
   3158     }
   3159     }
   3160   }
   3161 
   3162   if (DefAlign < 8 && Subtarget.isLikeA9()) {
   3163     switch (DefMCID->getOpcode()) {
   3164     default: break;
   3165     case ARM::VLD1q8:
   3166     case ARM::VLD1q16:
   3167     case ARM::VLD1q32:
   3168     case ARM::VLD1q64:
   3169     case ARM::VLD1q8wb_fixed:
   3170     case ARM::VLD1q16wb_fixed:
   3171     case ARM::VLD1q32wb_fixed:
   3172     case ARM::VLD1q64wb_fixed:
   3173     case ARM::VLD1q8wb_register:
   3174     case ARM::VLD1q16wb_register:
   3175     case ARM::VLD1q32wb_register:
   3176     case ARM::VLD1q64wb_register:
   3177     case ARM::VLD2d8:
   3178     case ARM::VLD2d16:
   3179     case ARM::VLD2d32:
   3180     case ARM::VLD2q8:
   3181     case ARM::VLD2q16:
   3182     case ARM::VLD2q32:
   3183     case ARM::VLD2d8wb_fixed:
   3184     case ARM::VLD2d16wb_fixed:
   3185     case ARM::VLD2d32wb_fixed:
   3186     case ARM::VLD2q8wb_fixed:
   3187     case ARM::VLD2q16wb_fixed:
   3188     case ARM::VLD2q32wb_fixed:
   3189     case ARM::VLD2d8wb_register:
   3190     case ARM::VLD2d16wb_register:
   3191     case ARM::VLD2d32wb_register:
   3192     case ARM::VLD2q8wb_register:
   3193     case ARM::VLD2q16wb_register:
   3194     case ARM::VLD2q32wb_register:
   3195     case ARM::VLD3d8:
   3196     case ARM::VLD3d16:
   3197     case ARM::VLD3d32:
   3198     case ARM::VLD1d64T:
   3199     case ARM::VLD3d8_UPD:
   3200     case ARM::VLD3d16_UPD:
   3201     case ARM::VLD3d32_UPD:
   3202     case ARM::VLD1d64Twb_fixed:
   3203     case ARM::VLD1d64Twb_register:
   3204     case ARM::VLD3q8_UPD:
   3205     case ARM::VLD3q16_UPD:
   3206     case ARM::VLD3q32_UPD:
   3207     case ARM::VLD4d8:
   3208     case ARM::VLD4d16:
   3209     case ARM::VLD4d32:
   3210     case ARM::VLD1d64Q:
   3211     case ARM::VLD4d8_UPD:
   3212     case ARM::VLD4d16_UPD:
   3213     case ARM::VLD4d32_UPD:
   3214     case ARM::VLD1d64Qwb_fixed:
   3215     case ARM::VLD1d64Qwb_register:
   3216     case ARM::VLD4q8_UPD:
   3217     case ARM::VLD4q16_UPD:
   3218     case ARM::VLD4q32_UPD:
   3219     case ARM::VLD1DUPq8:
   3220     case ARM::VLD1DUPq16:
   3221     case ARM::VLD1DUPq32:
   3222     case ARM::VLD1DUPq8wb_fixed:
   3223     case ARM::VLD1DUPq16wb_fixed:
   3224     case ARM::VLD1DUPq32wb_fixed:
   3225     case ARM::VLD1DUPq8wb_register:
   3226     case ARM::VLD1DUPq16wb_register:
   3227     case ARM::VLD1DUPq32wb_register:
   3228     case ARM::VLD2DUPd8:
   3229     case ARM::VLD2DUPd16:
   3230     case ARM::VLD2DUPd32:
   3231     case ARM::VLD2DUPd8wb_fixed:
   3232     case ARM::VLD2DUPd16wb_fixed:
   3233     case ARM::VLD2DUPd32wb_fixed:
   3234     case ARM::VLD2DUPd8wb_register:
   3235     case ARM::VLD2DUPd16wb_register:
   3236     case ARM::VLD2DUPd32wb_register:
   3237     case ARM::VLD4DUPd8:
   3238     case ARM::VLD4DUPd16:
   3239     case ARM::VLD4DUPd32:
   3240     case ARM::VLD4DUPd8_UPD:
   3241     case ARM::VLD4DUPd16_UPD:
   3242     case ARM::VLD4DUPd32_UPD:
   3243     case ARM::VLD1LNd8:
   3244     case ARM::VLD1LNd16:
   3245     case ARM::VLD1LNd32:
   3246     case ARM::VLD1LNd8_UPD:
   3247     case ARM::VLD1LNd16_UPD:
   3248     case ARM::VLD1LNd32_UPD:
   3249     case ARM::VLD2LNd8:
   3250     case ARM::VLD2LNd16:
   3251     case ARM::VLD2LNd32:
   3252     case ARM::VLD2LNq16:
   3253     case ARM::VLD2LNq32:
   3254     case ARM::VLD2LNd8_UPD:
   3255     case ARM::VLD2LNd16_UPD:
   3256     case ARM::VLD2LNd32_UPD:
   3257     case ARM::VLD2LNq16_UPD:
   3258     case ARM::VLD2LNq32_UPD:
   3259     case ARM::VLD4LNd8:
   3260     case ARM::VLD4LNd16:
   3261     case ARM::VLD4LNd32:
   3262     case ARM::VLD4LNq16:
   3263     case ARM::VLD4LNq32:
   3264     case ARM::VLD4LNd8_UPD:
   3265     case ARM::VLD4LNd16_UPD:
   3266     case ARM::VLD4LNd32_UPD:
   3267     case ARM::VLD4LNq16_UPD:
   3268     case ARM::VLD4LNq32_UPD:
   3269       // If the address is not 64-bit aligned, the latencies of these
   3270       // instructions increases by one.
   3271       ++Adjust;
   3272       break;
   3273     }
   3274   }
   3275   return Adjust;
   3276 }
   3277 
   3278 
   3279 
   3280 int
   3281 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   3282                                     const MachineInstr *DefMI, unsigned DefIdx,
   3283                                     const MachineInstr *UseMI,
   3284                                     unsigned UseIdx) const {
   3285   // No operand latency. The caller may fall back to getInstrLatency.
   3286   if (!ItinData || ItinData->isEmpty())
   3287     return -1;
   3288 
   3289   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
   3290   unsigned Reg = DefMO.getReg();
   3291   const MCInstrDesc *DefMCID = &DefMI->getDesc();
   3292   const MCInstrDesc *UseMCID = &UseMI->getDesc();
   3293 
   3294   unsigned DefAdj = 0;
   3295   if (DefMI->isBundle()) {
   3296     DefMI = getBundledDefMI(&getRegisterInfo(), DefMI, Reg, DefIdx, DefAdj);
   3297     DefMCID = &DefMI->getDesc();
   3298   }
   3299   if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
   3300       DefMI->isRegSequence() || DefMI->isImplicitDef()) {
   3301     return 1;
   3302   }
   3303 
   3304   unsigned UseAdj = 0;
   3305   if (UseMI->isBundle()) {
   3306     unsigned NewUseIdx;
   3307     const MachineInstr *NewUseMI = getBundledUseMI(&getRegisterInfo(), UseMI,
   3308                                                    Reg, NewUseIdx, UseAdj);
   3309     if (!NewUseMI)
   3310       return -1;
   3311 
   3312     UseMI = NewUseMI;
   3313     UseIdx = NewUseIdx;
   3314     UseMCID = &UseMI->getDesc();
   3315   }
   3316 
   3317   if (Reg == ARM::CPSR) {
   3318     if (DefMI->getOpcode() == ARM::FMSTAT) {
   3319       // fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)
   3320       return Subtarget.isLikeA9() ? 1 : 20;
   3321     }
   3322 
   3323     // CPSR set and branch can be paired in the same cycle.
   3324     if (UseMI->isBranch())
   3325       return 0;
   3326 
   3327     // Otherwise it takes the instruction latency (generally one).
   3328     unsigned Latency = getInstrLatency(ItinData, DefMI);
   3329 
   3330     // For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to
   3331     // its uses. Instructions which are otherwise scheduled between them may
   3332     // incur a code size penalty (not able to use the CPSR setting 16-bit
   3333     // instructions).
   3334     if (Latency > 0 && Subtarget.isThumb2()) {
   3335       const MachineFunction *MF = DefMI->getParent()->getParent();
   3336       if (MF->getFunction()->getAttributes().
   3337             hasAttribute(AttributeSet::FunctionIndex,
   3338                          Attribute::OptimizeForSize))
   3339         --Latency;
   3340     }
   3341     return Latency;
   3342   }
   3343 
   3344   if (DefMO.isImplicit() || UseMI->getOperand(UseIdx).isImplicit())
   3345     return -1;
   3346 
   3347   unsigned DefAlign = DefMI->hasOneMemOperand()
   3348     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   3349   unsigned UseAlign = UseMI->hasOneMemOperand()
   3350     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
   3351 
   3352   // Get the itinerary's latency if possible, and handle variable_ops.
   3353   int Latency = getOperandLatency(ItinData, *DefMCID, DefIdx, DefAlign,
   3354                                   *UseMCID, UseIdx, UseAlign);
   3355   // Unable to find operand latency. The caller may resort to getInstrLatency.
   3356   if (Latency < 0)
   3357     return Latency;
   3358 
   3359   // Adjust for IT block position.
   3360   int Adj = DefAdj + UseAdj;
   3361 
   3362   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
   3363   Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);
   3364   if (Adj >= 0 || (int)Latency > -Adj) {
   3365     return Latency + Adj;
   3366   }
   3367   // Return the itinerary latency, which may be zero but not less than zero.
   3368   return Latency;
   3369 }
   3370 
   3371 int
   3372 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   3373                                     SDNode *DefNode, unsigned DefIdx,
   3374                                     SDNode *UseNode, unsigned UseIdx) const {
   3375   if (!DefNode->isMachineOpcode())
   3376     return 1;
   3377 
   3378   const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
   3379 
   3380   if (isZeroCost(DefMCID.Opcode))
   3381     return 0;
   3382 
   3383   if (!ItinData || ItinData->isEmpty())
   3384     return DefMCID.mayLoad() ? 3 : 1;
   3385 
   3386   if (!UseNode->isMachineOpcode()) {
   3387     int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
   3388     if (Subtarget.isLikeA9() || Subtarget.isSwift())
   3389       return Latency <= 2 ? 1 : Latency - 1;
   3390     else
   3391       return Latency <= 3 ? 1 : Latency - 2;
   3392   }
   3393 
   3394   const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
   3395   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
   3396   unsigned DefAlign = !DefMN->memoperands_empty()
   3397     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
   3398   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
   3399   unsigned UseAlign = !UseMN->memoperands_empty()
   3400     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
   3401   int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
   3402                                   UseMCID, UseIdx, UseAlign);
   3403 
   3404   if (Latency > 1 &&
   3405       (Subtarget.isCortexA8() || Subtarget.isLikeA9())) {
   3406     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
   3407     // variants are one cycle cheaper.
   3408     switch (DefMCID.getOpcode()) {
   3409     default: break;
   3410     case ARM::LDRrs:
   3411     case ARM::LDRBrs: {
   3412       unsigned ShOpVal =
   3413         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
   3414       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   3415       if (ShImm == 0 ||
   3416           (ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
   3417         --Latency;
   3418       break;
   3419     }
   3420     case ARM::t2LDRs:
   3421     case ARM::t2LDRBs:
   3422     case ARM::t2LDRHs:
   3423     case ARM::t2LDRSHs: {
   3424       // Thumb2 mode: lsl only.
   3425       unsigned ShAmt =
   3426         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
   3427       if (ShAmt == 0 || ShAmt == 2)
   3428         --Latency;
   3429       break;
   3430     }
   3431     }
   3432   } else if (DefIdx == 0 && Latency > 2 && Subtarget.isSwift()) {
   3433     // FIXME: Properly handle all of the latency adjustments for address
   3434     // writeback.
   3435     switch (DefMCID.getOpcode()) {
   3436     default: break;
   3437     case ARM::LDRrs:
   3438     case ARM::LDRBrs: {
   3439       unsigned ShOpVal =
   3440         cast<ConstantSDNode>(DefNode->getOperand(2))->getZExtValue();
   3441       unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);
   3442       if (ShImm == 0 ||
   3443           ((ShImm == 1 || ShImm == 2 || ShImm == 3) &&
   3444            ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))
   3445         Latency -= 2;
   3446       else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)
   3447         --Latency;
   3448       break;
   3449     }
   3450     case ARM::t2LDRs:
   3451     case ARM::t2LDRBs:
   3452     case ARM::t2LDRHs:
   3453     case ARM::t2LDRSHs: {
   3454       // Thumb2 mode: lsl 0-3 only.
   3455       Latency -= 2;
   3456       break;
   3457     }
   3458     }
   3459   }
   3460 
   3461   if (DefAlign < 8 && Subtarget.isLikeA9())
   3462     switch (DefMCID.getOpcode()) {
   3463     default: break;
   3464     case ARM::VLD1q8:
   3465     case ARM::VLD1q16:
   3466     case ARM::VLD1q32:
   3467     case ARM::VLD1q64:
   3468     case ARM::VLD1q8wb_register:
   3469     case ARM::VLD1q16wb_register:
   3470     case ARM::VLD1q32wb_register:
   3471     case ARM::VLD1q64wb_register:
   3472     case ARM::VLD1q8wb_fixed:
   3473     case ARM::VLD1q16wb_fixed:
   3474     case ARM::VLD1q32wb_fixed:
   3475     case ARM::VLD1q64wb_fixed:
   3476     case ARM::VLD2d8:
   3477     case ARM::VLD2d16:
   3478     case ARM::VLD2d32:
   3479     case ARM::VLD2q8Pseudo:
   3480     case ARM::VLD2q16Pseudo:
   3481     case ARM::VLD2q32Pseudo:
   3482     case ARM::VLD2d8wb_fixed:
   3483     case ARM::VLD2d16wb_fixed:
   3484     case ARM::VLD2d32wb_fixed:
   3485     case ARM::VLD2q8PseudoWB_fixed:
   3486     case ARM::VLD2q16PseudoWB_fixed:
   3487     case ARM::VLD2q32PseudoWB_fixed:
   3488     case ARM::VLD2d8wb_register:
   3489     case ARM::VLD2d16wb_register:
   3490     case ARM::VLD2d32wb_register:
   3491     case ARM::VLD2q8PseudoWB_register:
   3492     case ARM::VLD2q16PseudoWB_register:
   3493     case ARM::VLD2q32PseudoWB_register:
   3494     case ARM::VLD3d8Pseudo:
   3495     case ARM::VLD3d16Pseudo:
   3496     case ARM::VLD3d32Pseudo:
   3497     case ARM::VLD1d64TPseudo:
   3498     case ARM::VLD3d8Pseudo_UPD:
   3499     case ARM::VLD3d16Pseudo_UPD:
   3500     case ARM::VLD3d32Pseudo_UPD:
   3501     case ARM::VLD3q8Pseudo_UPD:
   3502     case ARM::VLD3q16Pseudo_UPD:
   3503     case ARM::VLD3q32Pseudo_UPD:
   3504     case ARM::VLD3q8oddPseudo:
   3505     case ARM::VLD3q16oddPseudo:
   3506     case ARM::VLD3q32oddPseudo:
   3507     case ARM::VLD3q8oddPseudo_UPD:
   3508     case ARM::VLD3q16oddPseudo_UPD:
   3509     case ARM::VLD3q32oddPseudo_UPD:
   3510     case ARM::VLD4d8Pseudo:
   3511     case ARM::VLD4d16Pseudo:
   3512     case ARM::VLD4d32Pseudo:
   3513     case ARM::VLD1d64QPseudo:
   3514     case ARM::VLD4d8Pseudo_UPD:
   3515     case ARM::VLD4d16Pseudo_UPD:
   3516     case ARM::VLD4d32Pseudo_UPD:
   3517     case ARM::VLD4q8Pseudo_UPD:
   3518     case ARM::VLD4q16Pseudo_UPD:
   3519     case ARM::VLD4q32Pseudo_UPD:
   3520     case ARM::VLD4q8oddPseudo:
   3521     case ARM::VLD4q16oddPseudo:
   3522     case ARM::VLD4q32oddPseudo:
   3523     case ARM::VLD4q8oddPseudo_UPD:
   3524     case ARM::VLD4q16oddPseudo_UPD:
   3525     case ARM::VLD4q32oddPseudo_UPD:
   3526     case ARM::VLD1DUPq8:
   3527     case ARM::VLD1DUPq16:
   3528     case ARM::VLD1DUPq32:
   3529     case ARM::VLD1DUPq8wb_fixed:
   3530     case ARM::VLD1DUPq16wb_fixed:
   3531     case ARM::VLD1DUPq32wb_fixed:
   3532     case ARM::VLD1DUPq8wb_register:
   3533     case ARM::VLD1DUPq16wb_register:
   3534     case ARM::VLD1DUPq32wb_register:
   3535     case ARM::VLD2DUPd8:
   3536     case ARM::VLD2DUPd16:
   3537     case ARM::VLD2DUPd32:
   3538     case ARM::VLD2DUPd8wb_fixed:
   3539     case ARM::VLD2DUPd16wb_fixed:
   3540     case ARM::VLD2DUPd32wb_fixed:
   3541     case ARM::VLD2DUPd8wb_register:
   3542     case ARM::VLD2DUPd16wb_register:
   3543     case ARM::VLD2DUPd32wb_register:
   3544     case ARM::VLD4DUPd8Pseudo:
   3545     case ARM::VLD4DUPd16Pseudo:
   3546     case ARM::VLD4DUPd32Pseudo:
   3547     case ARM::VLD4DUPd8Pseudo_UPD:
   3548     case ARM::VLD4DUPd16Pseudo_UPD:
   3549     case ARM::VLD4DUPd32Pseudo_UPD:
   3550     case ARM::VLD1LNq8Pseudo:
   3551     case ARM::VLD1LNq16Pseudo:
   3552     case ARM::VLD1LNq32Pseudo:
   3553     case ARM::VLD1LNq8Pseudo_UPD:
   3554     case ARM::VLD1LNq16Pseudo_UPD:
   3555     case ARM::VLD1LNq32Pseudo_UPD:
   3556     case ARM::VLD2LNd8Pseudo:
   3557     case ARM::VLD2LNd16Pseudo:
   3558     case ARM::VLD2LNd32Pseudo:
   3559     case ARM::VLD2LNq16Pseudo:
   3560     case ARM::VLD2LNq32Pseudo:
   3561     case ARM::VLD2LNd8Pseudo_UPD:
   3562     case ARM::VLD2LNd16Pseudo_UPD:
   3563     case ARM::VLD2LNd32Pseudo_UPD:
   3564     case ARM::VLD2LNq16Pseudo_UPD:
   3565     case ARM::VLD2LNq32Pseudo_UPD:
   3566     case ARM::VLD4LNd8Pseudo:
   3567     case ARM::VLD4LNd16Pseudo:
   3568     case ARM::VLD4LNd32Pseudo:
   3569     case ARM::VLD4LNq16Pseudo:
   3570     case ARM::VLD4LNq32Pseudo:
   3571     case ARM::VLD4LNd8Pseudo_UPD:
   3572     case ARM::VLD4LNd16Pseudo_UPD:
   3573     case ARM::VLD4LNd32Pseudo_UPD:
   3574     case ARM::VLD4LNq16Pseudo_UPD:
   3575     case ARM::VLD4LNq32Pseudo_UPD:
   3576       // If the address is not 64-bit aligned, the latencies of these
   3577       // instructions increases by one.
   3578       ++Latency;
   3579       break;
   3580     }
   3581 
   3582   return Latency;
   3583 }
   3584 
   3585 unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   3586                                            const MachineInstr *MI,
   3587                                            unsigned *PredCost) const {
   3588   if (MI->isCopyLike() || MI->isInsertSubreg() ||
   3589       MI->isRegSequence() || MI->isImplicitDef())
   3590     return 1;
   3591 
   3592   // An instruction scheduler typically runs on unbundled instructions, however
   3593   // other passes may query the latency of a bundled instruction.
   3594   if (MI->isBundle()) {
   3595     unsigned Latency = 0;
   3596     MachineBasicBlock::const_instr_iterator I = MI;
   3597     MachineBasicBlock::const_instr_iterator E = MI->getParent()->instr_end();
   3598     while (++I != E && I->isInsideBundle()) {
   3599       if (I->getOpcode() != ARM::t2IT)
   3600         Latency += getInstrLatency(ItinData, I, PredCost);
   3601     }
   3602     return Latency;
   3603   }
   3604 
   3605   const MCInstrDesc &MCID = MI->getDesc();
   3606   if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
   3607     // When predicated, CPSR is an additional source operand for CPSR updating
   3608     // instructions, this apparently increases their latencies.
   3609     *PredCost = 1;
   3610   }
   3611   // Be sure to call getStageLatency for an empty itinerary in case it has a
   3612   // valid MinLatency property.
   3613   if (!ItinData)
   3614     return MI->mayLoad() ? 3 : 1;
   3615 
   3616   unsigned Class = MCID.getSchedClass();
   3617 
   3618   // For instructions with variable uops, use uops as latency.
   3619   if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)
   3620     return getNumMicroOps(ItinData, MI);
   3621 
   3622   // For the common case, fall back on the itinerary's latency.
   3623   unsigned Latency = ItinData->getStageLatency(Class);
   3624 
   3625   // Adjust for dynamic def-side opcode variants not captured by the itinerary.
   3626   unsigned DefAlign = MI->hasOneMemOperand()
   3627     ? (*MI->memoperands_begin())->getAlignment() : 0;
   3628   int Adj = adjustDefLatency(Subtarget, MI, &MCID, DefAlign);
   3629   if (Adj >= 0 || (int)Latency > -Adj) {
   3630     return Latency + Adj;
   3631   }
   3632   return Latency;
   3633 }
   3634 
   3635 int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   3636                                       SDNode *Node) const {
   3637   if (!Node->isMachineOpcode())
   3638     return 1;
   3639 
   3640   if (!ItinData || ItinData->isEmpty())
   3641     return 1;
   3642 
   3643   unsigned Opcode = Node->getMachineOpcode();
   3644   switch (Opcode) {
   3645   default:
   3646     return ItinData->getStageLatency(get(Opcode).getSchedClass());
   3647   case ARM::VLDMQIA:
   3648   case ARM::VSTMQIA:
   3649     return 2;
   3650   }
   3651 }
   3652 
   3653 bool ARMBaseInstrInfo::
   3654 hasHighOperandLatency(const InstrItineraryData *ItinData,
   3655                       const MachineRegisterInfo *MRI,
   3656                       const MachineInstr *DefMI, unsigned DefIdx,
   3657                       const MachineInstr *UseMI, unsigned UseIdx) const {
   3658   unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
   3659   unsigned UDomain = UseMI->getDesc().TSFlags & ARMII::DomainMask;
   3660   if (Subtarget.isCortexA8() &&
   3661       (DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))
   3662     // CortexA8 VFP instructions are not pipelined.
   3663     return true;
   3664 
   3665   // Hoist VFP / NEON instructions with 4 or higher latency.
   3666   int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx);
   3667   if (Latency < 0)
   3668     Latency = getInstrLatency(ItinData, DefMI);
   3669   if (Latency <= 3)
   3670     return false;
   3671   return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||
   3672          UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;
   3673 }
   3674 
   3675 bool ARMBaseInstrInfo::
   3676 hasLowDefLatency(const InstrItineraryData *ItinData,
   3677                  const MachineInstr *DefMI, unsigned DefIdx) const {
   3678   if (!ItinData || ItinData->isEmpty())
   3679     return false;
   3680 
   3681   unsigned DDomain = DefMI->getDesc().TSFlags & ARMII::DomainMask;
   3682   if (DDomain == ARMII::DomainGeneral) {
   3683     unsigned DefClass = DefMI->getDesc().getSchedClass();
   3684     int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
   3685     return (DefCycle != -1 && DefCycle <= 2);
   3686   }
   3687   return false;
   3688 }
   3689 
   3690 bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr *MI,
   3691                                          StringRef &ErrInfo) const {
   3692   if (convertAddSubFlagsOpcode(MI->getOpcode())) {
   3693     ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";
   3694     return false;
   3695   }
   3696   return true;
   3697 }
   3698 
   3699 bool
   3700 ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,
   3701                                      unsigned &AddSubOpc,
   3702                                      bool &NegAcc, bool &HasLane) const {
   3703   DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);
   3704   if (I == MLxEntryMap.end())
   3705     return false;
   3706 
   3707   const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];
   3708   MulOpc = Entry.MulOpc;
   3709   AddSubOpc = Entry.AddSubOpc;
   3710   NegAcc = Entry.NegAcc;
   3711   HasLane = Entry.HasLane;
   3712   return true;
   3713 }
   3714 
   3715 //===----------------------------------------------------------------------===//
   3716 // Execution domains.
   3717 //===----------------------------------------------------------------------===//
   3718 //
   3719 // Some instructions go down the NEON pipeline, some go down the VFP pipeline,
   3720 // and some can go down both.  The vmov instructions go down the VFP pipeline,
   3721 // but they can be changed to vorr equivalents that are executed by the NEON
   3722 // pipeline.
   3723 //
   3724 // We use the following execution domain numbering:
   3725 //
   3726 enum ARMExeDomain {
   3727   ExeGeneric = 0,
   3728   ExeVFP = 1,
   3729   ExeNEON = 2
   3730 };
   3731 //
   3732 // Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h
   3733 //
   3734 std::pair<uint16_t, uint16_t>
   3735 ARMBaseInstrInfo::getExecutionDomain(const MachineInstr *MI) const {
   3736   // VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON
   3737   // if they are not predicated.
   3738   if (MI->getOpcode() == ARM::VMOVD && !isPredicated(MI))
   3739     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
   3740 
   3741   // CortexA9 is particularly picky about mixing the two and wants these
   3742   // converted.
   3743   if (Subtarget.isCortexA9() && !isPredicated(MI) &&
   3744       (MI->getOpcode() == ARM::VMOVRS ||
   3745        MI->getOpcode() == ARM::VMOVSR ||
   3746        MI->getOpcode() == ARM::VMOVS))
   3747     return std::make_pair(ExeVFP, (1<<ExeVFP) | (1<<ExeNEON));
   3748 
   3749   // No other instructions can be swizzled, so just determine their domain.
   3750   unsigned Domain = MI->getDesc().TSFlags & ARMII::DomainMask;
   3751 
   3752   if (Domain & ARMII::DomainNEON)
   3753     return std::make_pair(ExeNEON, 0);
   3754 
   3755   // Certain instructions can go either way on Cortex-A8.
   3756   // Treat them as NEON instructions.
   3757   if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())
   3758     return std::make_pair(ExeNEON, 0);
   3759 
   3760   if (Domain & ARMII::DomainVFP)
   3761     return std::make_pair(ExeVFP, 0);
   3762 
   3763   return std::make_pair(ExeGeneric, 0);
   3764 }
   3765 
   3766 static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,
   3767                                             unsigned SReg, unsigned &Lane) {
   3768   unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);
   3769   Lane = 0;
   3770 
   3771   if (DReg != ARM::NoRegister)
   3772    return DReg;
   3773 
   3774   Lane = 1;
   3775   DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);
   3776 
   3777   assert(DReg && "S-register with no D super-register?");
   3778   return DReg;
   3779 }
   3780 
   3781 /// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,
   3782 /// set ImplicitSReg to a register number that must be marked as implicit-use or
   3783 /// zero if no register needs to be defined as implicit-use.
   3784 ///
   3785 /// If the function cannot determine if an SPR should be marked implicit use or
   3786 /// not, it returns false.
   3787 ///
   3788 /// This function handles cases where an instruction is being modified from taking
   3789 /// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict
   3790 /// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other
   3791 /// lane of the DPR).
   3792 ///
   3793 /// If the other SPR is defined, an implicit-use of it should be added. Else,
   3794 /// (including the case where the DPR itself is defined), it should not.
   3795 ///
   3796 static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,
   3797                                        MachineInstr *MI,
   3798                                        unsigned DReg, unsigned Lane,
   3799                                        unsigned &ImplicitSReg) {
   3800   // If the DPR is defined or used already, the other SPR lane will be chained
   3801   // correctly, so there is nothing to be done.
   3802   if (MI->definesRegister(DReg, TRI) || MI->readsRegister(DReg, TRI)) {
   3803     ImplicitSReg = 0;
   3804     return true;
   3805   }
   3806 
   3807   // Otherwise we need to go searching to see if the SPR is set explicitly.
   3808   ImplicitSReg = TRI->getSubReg(DReg,
   3809                                 (Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);
   3810   MachineBasicBlock::LivenessQueryResult LQR =
   3811     MI->getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);
   3812 
   3813   if (LQR == MachineBasicBlock::LQR_Live)
   3814     return true;
   3815   else if (LQR == MachineBasicBlock::LQR_Unknown)
   3816     return false;
   3817 
   3818   // If the register is known not to be live, there is no need to add an
   3819   // implicit-use.
   3820   ImplicitSReg = 0;
   3821   return true;
   3822 }
   3823 
   3824 void
   3825 ARMBaseInstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const {
   3826   unsigned DstReg, SrcReg, DReg;
   3827   unsigned Lane;
   3828   MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI);
   3829   const TargetRegisterInfo *TRI = &getRegisterInfo();
   3830   switch (MI->getOpcode()) {
   3831     default:
   3832       llvm_unreachable("cannot handle opcode!");
   3833       break;
   3834     case ARM::VMOVD:
   3835       if (Domain != ExeNEON)
   3836         break;
   3837 
   3838       // Zap the predicate operands.
   3839       assert(!isPredicated(MI) && "Cannot predicate a VORRd");
   3840 
   3841       // Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)
   3842       DstReg = MI->getOperand(0).getReg();
   3843       SrcReg = MI->getOperand(1).getReg();
   3844 
   3845       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
   3846         MI->RemoveOperand(i-1);
   3847 
   3848       // Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)
   3849       MI->setDesc(get(ARM::VORRd));
   3850       AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
   3851                         .addReg(SrcReg)
   3852                         .addReg(SrcReg));
   3853       break;
   3854     case ARM::VMOVRS:
   3855       if (Domain != ExeNEON)
   3856         break;
   3857       assert(!isPredicated(MI) && "Cannot predicate a VGETLN");
   3858 
   3859       // Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)
   3860       DstReg = MI->getOperand(0).getReg();
   3861       SrcReg = MI->getOperand(1).getReg();
   3862 
   3863       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
   3864         MI->RemoveOperand(i-1);
   3865 
   3866       DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);
   3867 
   3868       // Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)
   3869       // Note that DSrc has been widened and the other lane may be undef, which
   3870       // contaminates the entire register.
   3871       MI->setDesc(get(ARM::VGETLNi32));
   3872       AddDefaultPred(MIB.addReg(DstReg, RegState::Define)
   3873                         .addReg(DReg, RegState::Undef)
   3874                         .addImm(Lane));
   3875 
   3876       // The old source should be an implicit use, otherwise we might think it
   3877       // was dead before here.
   3878       MIB.addReg(SrcReg, RegState::Implicit);
   3879       break;
   3880     case ARM::VMOVSR: {
   3881       if (Domain != ExeNEON)
   3882         break;
   3883       assert(!isPredicated(MI) && "Cannot predicate a VSETLN");
   3884 
   3885       // Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)
   3886       DstReg = MI->getOperand(0).getReg();
   3887       SrcReg = MI->getOperand(1).getReg();
   3888 
   3889       DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);
   3890 
   3891       unsigned ImplicitSReg;
   3892       if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))
   3893         break;
   3894 
   3895       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
   3896         MI->RemoveOperand(i-1);
   3897 
   3898       // Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)
   3899       // Again DDst may be undefined at the beginning of this instruction.
   3900       MI->setDesc(get(ARM::VSETLNi32));
   3901       MIB.addReg(DReg, RegState::Define)
   3902          .addReg(DReg, getUndefRegState(!MI->readsRegister(DReg, TRI)))
   3903          .addReg(SrcReg)
   3904          .addImm(Lane);
   3905       AddDefaultPred(MIB);
   3906 
   3907       // The narrower destination must be marked as set to keep previous chains
   3908       // in place.
   3909       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
   3910       if (ImplicitSReg != 0)
   3911         MIB.addReg(ImplicitSReg, RegState::Implicit);
   3912       break;
   3913     }
   3914     case ARM::VMOVS: {
   3915       if (Domain != ExeNEON)
   3916         break;
   3917 
   3918       // Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)
   3919       DstReg = MI->getOperand(0).getReg();
   3920       SrcReg = MI->getOperand(1).getReg();
   3921 
   3922       unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;
   3923       DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);
   3924       DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);
   3925 
   3926       unsigned ImplicitSReg;
   3927       if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))
   3928         break;
   3929 
   3930       for (unsigned i = MI->getDesc().getNumOperands(); i; --i)
   3931         MI->RemoveOperand(i-1);
   3932 
   3933       if (DSrc == DDst) {
   3934         // Destination can be:
   3935         //     %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)
   3936         MI->setDesc(get(ARM::VDUPLN32d));
   3937         MIB.addReg(DDst, RegState::Define)
   3938            .addReg(DDst, getUndefRegState(!MI->readsRegister(DDst, TRI)))
   3939            .addImm(SrcLane);
   3940         AddDefaultPred(MIB);
   3941 
   3942         // Neither the source or the destination are naturally represented any
   3943         // more, so add them in manually.
   3944         MIB.addReg(DstReg, RegState::Implicit | RegState::Define);
   3945         MIB.addReg(SrcReg, RegState::Implicit);
   3946         if (ImplicitSReg != 0)
   3947           MIB.addReg(ImplicitSReg, RegState::Implicit);
   3948         break;
   3949       }
   3950 
   3951       // In general there's no single instruction that can perform an S <-> S
   3952       // move in NEON space, but a pair of VEXT instructions *can* do the
   3953       // job. It turns out that the VEXTs needed will only use DSrc once, with
   3954       // the position based purely on the combination of lane-0 and lane-1
   3955       // involved. For example
   3956       //     vmov s0, s2 -> vext.32 d0, d0, d1, #1  vext.32 d0, d0, d0, #1
   3957       //     vmov s1, s3 -> vext.32 d0, d1, d0, #1  vext.32 d0, d0, d0, #1
   3958       //     vmov s0, s3 -> vext.32 d0, d0, d0, #1  vext.32 d0, d1, d0, #1
   3959       //     vmov s1, s2 -> vext.32 d0, d0, d0, #1  vext.32 d0, d0, d1, #1
   3960       //
   3961       // Pattern of the MachineInstrs is:
   3962       //     %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)
   3963       MachineInstrBuilder NewMIB;
   3964       NewMIB = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   3965                        get(ARM::VEXTd32), DDst);
   3966 
   3967       // On the first instruction, both DSrc and DDst may be <undef> if present.
   3968       // Specifically when the original instruction didn't have them as an
   3969       // <imp-use>.
   3970       unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;
   3971       bool CurUndef = !MI->readsRegister(CurReg, TRI);
   3972       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
   3973 
   3974       CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;
   3975       CurUndef = !MI->readsRegister(CurReg, TRI);
   3976       NewMIB.addReg(CurReg, getUndefRegState(CurUndef));
   3977 
   3978       NewMIB.addImm(1);
   3979       AddDefaultPred(NewMIB);
   3980 
   3981       if (SrcLane == DstLane)
   3982         NewMIB.addReg(SrcReg, RegState::Implicit);
   3983 
   3984       MI->setDesc(get(ARM::VEXTd32));
   3985       MIB.addReg(DDst, RegState::Define);
   3986 
   3987       // On the second instruction, DDst has definitely been defined above, so
   3988       // it is not <undef>. DSrc, if present, can be <undef> as above.
   3989       CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;
   3990       CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
   3991       MIB.addReg(CurReg, getUndefRegState(CurUndef));
   3992 
   3993       CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;
   3994       CurUndef = CurReg == DSrc && !MI->readsRegister(CurReg, TRI);
   3995       MIB.addReg(CurReg, getUndefRegState(CurUndef));
   3996 
   3997       MIB.addImm(1);
   3998       AddDefaultPred(MIB);
   3999 
   4000       if (SrcLane != DstLane)
   4001         MIB.addReg(SrcReg, RegState::Implicit);
   4002 
   4003       // As before, the original destination is no longer represented, add it
   4004       // implicitly.
   4005       MIB.addReg(DstReg, RegState::Define | RegState::Implicit);
   4006       if (ImplicitSReg != 0)
   4007         MIB.addReg(ImplicitSReg, RegState::Implicit);
   4008       break;
   4009     }
   4010   }
   4011 
   4012 }
   4013 
   4014 //===----------------------------------------------------------------------===//
   4015 // Partial register updates
   4016 //===----------------------------------------------------------------------===//
   4017 //
   4018 // Swift renames NEON registers with 64-bit granularity.  That means any
   4019 // instruction writing an S-reg implicitly reads the containing D-reg.  The
   4020 // problem is mostly avoided by translating f32 operations to v2f32 operations
   4021 // on D-registers, but f32 loads are still a problem.
   4022 //
   4023 // These instructions can load an f32 into a NEON register:
   4024 //
   4025 // VLDRS - Only writes S, partial D update.
   4026 // VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.
   4027 // VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.
   4028 //
   4029 // FCONSTD can be used as a dependency-breaking instruction.
   4030 unsigned ARMBaseInstrInfo::
   4031 getPartialRegUpdateClearance(const MachineInstr *MI,
   4032                              unsigned OpNum,
   4033                              const TargetRegisterInfo *TRI) const {
   4034   if (!SwiftPartialUpdateClearance ||
   4035       !(Subtarget.isSwift() || Subtarget.isCortexA15()))
   4036     return 0;
   4037 
   4038   assert(TRI && "Need TRI instance");
   4039 
   4040   const MachineOperand &MO = MI->getOperand(OpNum);
   4041   if (MO.readsReg())
   4042     return 0;
   4043   unsigned Reg = MO.getReg();
   4044   int UseOp = -1;
   4045 
   4046   switch(MI->getOpcode()) {
   4047     // Normal instructions writing only an S-register.
   4048   case ARM::VLDRS:
   4049   case ARM::FCONSTS:
   4050   case ARM::VMOVSR:
   4051   case ARM::VMOVv8i8:
   4052   case ARM::VMOVv4i16:
   4053   case ARM::VMOVv2i32:
   4054   case ARM::VMOVv2f32:
   4055   case ARM::VMOVv1i64:
   4056     UseOp = MI->findRegisterUseOperandIdx(Reg, false, TRI);
   4057     break;
   4058 
   4059     // Explicitly reads the dependency.
   4060   case ARM::VLD1LNd32:
   4061     UseOp = 3;
   4062     break;
   4063   default:
   4064     return 0;
   4065   }
   4066 
   4067   // If this instruction actually reads a value from Reg, there is no unwanted
   4068   // dependency.
   4069   if (UseOp != -1 && MI->getOperand(UseOp).readsReg())
   4070     return 0;
   4071 
   4072   // We must be able to clobber the whole D-reg.
   4073   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
   4074     // Virtual register must be a foo:ssub_0<def,undef> operand.
   4075     if (!MO.getSubReg() || MI->readsVirtualRegister(Reg))
   4076       return 0;
   4077   } else if (ARM::SPRRegClass.contains(Reg)) {
   4078     // Physical register: MI must define the full D-reg.
   4079     unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,
   4080                                              &ARM::DPRRegClass);
   4081     if (!DReg || !MI->definesRegister(DReg, TRI))
   4082       return 0;
   4083   }
   4084 
   4085   // MI has an unwanted D-register dependency.
   4086   // Avoid defs in the previous N instructrions.
   4087   return SwiftPartialUpdateClearance;
   4088 }
   4089 
   4090 // Break a partial register dependency after getPartialRegUpdateClearance
   4091 // returned non-zero.
   4092 void ARMBaseInstrInfo::
   4093 breakPartialRegDependency(MachineBasicBlock::iterator MI,
   4094                           unsigned OpNum,
   4095                           const TargetRegisterInfo *TRI) const {
   4096   assert(MI && OpNum < MI->getDesc().getNumDefs() && "OpNum is not a def");
   4097   assert(TRI && "Need TRI instance");
   4098 
   4099   const MachineOperand &MO = MI->getOperand(OpNum);
   4100   unsigned Reg = MO.getReg();
   4101   assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
   4102          "Can't break virtual register dependencies.");
   4103   unsigned DReg = Reg;
   4104 
   4105   // If MI defines an S-reg, find the corresponding D super-register.
   4106   if (ARM::SPRRegClass.contains(Reg)) {
   4107     DReg = ARM::D0 + (Reg - ARM::S0) / 2;
   4108     assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");
   4109   }
   4110 
   4111   assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");
   4112   assert(MI->definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");
   4113 
   4114   // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines
   4115   // the full D-register by loading the same value to both lanes.  The
   4116   // instruction is micro-coded with 2 uops, so don't do this until we can
   4117   // properly schedule micro-coded instuctions.  The dispatcher stalls cause
   4118   // too big regressions.
   4119 
   4120   // Insert the dependency-breaking FCONSTD before MI.
   4121   // 96 is the encoding of 0.5, but the actual value doesn't matter here.
   4122   AddDefaultPred(BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
   4123                          get(ARM::FCONSTD), DReg).addImm(96));
   4124   MI->addRegisterKilled(DReg, TRI, true);
   4125 }
   4126 
   4127 bool ARMBaseInstrInfo::hasNOP() const {
   4128   return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0;
   4129 }
   4130 
   4131 bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {
   4132   if (MI->getNumOperands() < 4)
   4133     return true;
   4134   unsigned ShOpVal = MI->getOperand(3).getImm();
   4135   unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);
   4136   // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.
   4137   if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||
   4138       ((ShImm == 1 || ShImm == 2) &&
   4139        ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))
   4140     return true;
   4141 
   4142   return false;
   4143 }
   4144