Home | History | Annotate | Download | only in ARM
      1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the ARM-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // ARMGenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "ARM.h"
     17 #include "ARMBaseRegisterInfo.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMISelLowering.h"
     21 #include "ARMMachineFunctionInfo.h"
     22 #include "ARMSubtarget.h"
     23 #include "MCTargetDesc/ARMAddressingModes.h"
     24 #include "llvm/ADT/STLExtras.h"
     25 #include "llvm/CodeGen/Analysis.h"
     26 #include "llvm/CodeGen/FastISel.h"
     27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     28 #include "llvm/CodeGen/MachineConstantPool.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineInstrBuilder.h"
     31 #include "llvm/CodeGen/MachineMemOperand.h"
     32 #include "llvm/CodeGen/MachineModuleInfo.h"
     33 #include "llvm/CodeGen/MachineRegisterInfo.h"
     34 #include "llvm/IR/CallSite.h"
     35 #include "llvm/IR/CallingConv.h"
     36 #include "llvm/IR/DataLayout.h"
     37 #include "llvm/IR/DerivedTypes.h"
     38 #include "llvm/IR/GetElementPtrTypeIterator.h"
     39 #include "llvm/IR/GlobalVariable.h"
     40 #include "llvm/IR/Instructions.h"
     41 #include "llvm/IR/IntrinsicInst.h"
     42 #include "llvm/IR/Module.h"
     43 #include "llvm/IR/Operator.h"
     44 #include "llvm/Support/CommandLine.h"
     45 #include "llvm/Support/ErrorHandling.h"
     46 #include "llvm/Target/TargetInstrInfo.h"
     47 #include "llvm/Target/TargetLowering.h"
     48 #include "llvm/Target/TargetMachine.h"
     49 #include "llvm/Target/TargetOptions.h"
     50 using namespace llvm;
     51 
     52 extern cl::opt<bool> EnableARMLongCalls;
     53 
     54 namespace {
     55 
     56   // All possible address modes, plus some.
     57   typedef struct Address {
     58     enum {
     59       RegBase,
     60       FrameIndexBase
     61     } BaseType;
     62 
     63     union {
     64       unsigned Reg;
     65       int FI;
     66     } Base;
     67 
     68     int Offset;
     69 
     70     // Innocuous defaults for our address.
     71     Address()
     72      : BaseType(RegBase), Offset(0) {
     73        Base.Reg = 0;
     74      }
     75   } Address;
     76 
     77 class ARMFastISel final : public FastISel {
     78 
     79   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     80   /// make the right decision when generating code for different targets.
     81   const ARMSubtarget *Subtarget;
     82   Module &M;
     83   const TargetMachine &TM;
     84   const TargetInstrInfo &TII;
     85   const TargetLowering &TLI;
     86   ARMFunctionInfo *AFI;
     87 
     88   // Convenience variables to avoid some queries.
     89   bool isThumb2;
     90   LLVMContext *Context;
     91 
     92   public:
     93     explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
     94                          const TargetLibraryInfo *libInfo)
     95     : FastISel(funcInfo, libInfo),
     96       M(const_cast<Module&>(*funcInfo.Fn->getParent())),
     97       TM(funcInfo.MF->getTarget()),
     98       TII(*TM.getInstrInfo()),
     99       TLI(*TM.getTargetLowering()) {
    100       Subtarget = &TM.getSubtarget<ARMSubtarget>();
    101       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
    102       isThumb2 = AFI->isThumbFunction();
    103       Context = &funcInfo.Fn->getContext();
    104     }
    105 
    106     // Code from FastISel.cpp.
    107   private:
    108     unsigned FastEmitInst_r(unsigned MachineInstOpcode,
    109                             const TargetRegisterClass *RC,
    110                             unsigned Op0, bool Op0IsKill);
    111     unsigned FastEmitInst_rr(unsigned MachineInstOpcode,
    112                              const TargetRegisterClass *RC,
    113                              unsigned Op0, bool Op0IsKill,
    114                              unsigned Op1, bool Op1IsKill);
    115     unsigned FastEmitInst_rrr(unsigned MachineInstOpcode,
    116                               const TargetRegisterClass *RC,
    117                               unsigned Op0, bool Op0IsKill,
    118                               unsigned Op1, bool Op1IsKill,
    119                               unsigned Op2, bool Op2IsKill);
    120     unsigned FastEmitInst_ri(unsigned MachineInstOpcode,
    121                              const TargetRegisterClass *RC,
    122                              unsigned Op0, bool Op0IsKill,
    123                              uint64_t Imm);
    124     unsigned FastEmitInst_rri(unsigned MachineInstOpcode,
    125                               const TargetRegisterClass *RC,
    126                               unsigned Op0, bool Op0IsKill,
    127                               unsigned Op1, bool Op1IsKill,
    128                               uint64_t Imm);
    129     unsigned FastEmitInst_i(unsigned MachineInstOpcode,
    130                             const TargetRegisterClass *RC,
    131                             uint64_t Imm);
    132 
    133     // Backend specific FastISel code.
    134   private:
    135     bool TargetSelectInstruction(const Instruction *I) override;
    136     unsigned TargetMaterializeConstant(const Constant *C) override;
    137     unsigned TargetMaterializeAlloca(const AllocaInst *AI) override;
    138     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
    139                              const LoadInst *LI) override;
    140     bool FastLowerArguments() override;
    141   private:
    142   #include "ARMGenFastISel.inc"
    143 
    144     // Instruction selection routines.
    145   private:
    146     bool SelectLoad(const Instruction *I);
    147     bool SelectStore(const Instruction *I);
    148     bool SelectBranch(const Instruction *I);
    149     bool SelectIndirectBr(const Instruction *I);
    150     bool SelectCmp(const Instruction *I);
    151     bool SelectFPExt(const Instruction *I);
    152     bool SelectFPTrunc(const Instruction *I);
    153     bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
    154     bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
    155     bool SelectIToFP(const Instruction *I, bool isSigned);
    156     bool SelectFPToI(const Instruction *I, bool isSigned);
    157     bool SelectDiv(const Instruction *I, bool isSigned);
    158     bool SelectRem(const Instruction *I, bool isSigned);
    159     bool SelectCall(const Instruction *I, const char *IntrMemName);
    160     bool SelectIntrinsicCall(const IntrinsicInst &I);
    161     bool SelectSelect(const Instruction *I);
    162     bool SelectRet(const Instruction *I);
    163     bool SelectTrunc(const Instruction *I);
    164     bool SelectIntExt(const Instruction *I);
    165     bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
    166 
    167     // Utility routines.
    168   private:
    169     bool isTypeLegal(Type *Ty, MVT &VT);
    170     bool isLoadTypeLegal(Type *Ty, MVT &VT);
    171     bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
    172                     bool isZExt);
    173     bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    174                      unsigned Alignment = 0, bool isZExt = true,
    175                      bool allocReg = true);
    176     bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
    177                       unsigned Alignment = 0);
    178     bool ARMComputeAddress(const Value *Obj, Address &Addr);
    179     void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
    180     bool ARMIsMemCpySmall(uint64_t Len);
    181     bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
    182                                unsigned Alignment);
    183     unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
    184     unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
    185     unsigned ARMMaterializeInt(const Constant *C, MVT VT);
    186     unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
    187     unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
    188     unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
    189     unsigned ARMSelectCallOp(bool UseReg);
    190     unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
    191 
    192     const TargetLowering *getTargetLowering() { return TM.getTargetLowering(); }
    193 
    194     // Call handling routines.
    195   private:
    196     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
    197                                   bool Return,
    198                                   bool isVarArg);
    199     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
    200                          SmallVectorImpl<unsigned> &ArgRegs,
    201                          SmallVectorImpl<MVT> &ArgVTs,
    202                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
    203                          SmallVectorImpl<unsigned> &RegArgs,
    204                          CallingConv::ID CC,
    205                          unsigned &NumBytes,
    206                          bool isVarArg);
    207     unsigned getLibcallReg(const Twine &Name);
    208     bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
    209                     const Instruction *I, CallingConv::ID CC,
    210                     unsigned &NumBytes, bool isVarArg);
    211     bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
    212 
    213     // OptionalDef handling routines.
    214   private:
    215     bool isARMNEONPred(const MachineInstr *MI);
    216     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
    217     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
    218     void AddLoadStoreOperands(MVT VT, Address &Addr,
    219                               const MachineInstrBuilder &MIB,
    220                               unsigned Flags, bool useAM3);
    221 };
    222 
    223 } // end anonymous namespace
    224 
    225 #include "ARMGenCallingConv.inc"
    226 
    227 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
    228 // we don't care about implicit defs here, just places we'll need to add a
    229 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
    230 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
    231   if (!MI->hasOptionalDef())
    232     return false;
    233 
    234   // Look to see if our OptionalDef is defining CPSR or CCR.
    235   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    236     const MachineOperand &MO = MI->getOperand(i);
    237     if (!MO.isReg() || !MO.isDef()) continue;
    238     if (MO.getReg() == ARM::CPSR)
    239       *CPSR = true;
    240   }
    241   return true;
    242 }
    243 
    244 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
    245   const MCInstrDesc &MCID = MI->getDesc();
    246 
    247   // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
    248   if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
    249        AFI->isThumb2Function())
    250     return MI->isPredicable();
    251 
    252   for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
    253     if (MCID.OpInfo[i].isPredicate())
    254       return true;
    255 
    256   return false;
    257 }
    258 
    259 // If the machine is predicable go ahead and add the predicate operands, if
    260 // it needs default CC operands add those.
    261 // TODO: If we want to support thumb1 then we'll need to deal with optional
    262 // CPSR defs that need to be added before the remaining operands. See s_cc_out
    263 // for descriptions why.
    264 const MachineInstrBuilder &
    265 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
    266   MachineInstr *MI = &*MIB;
    267 
    268   // Do we use a predicate? or...
    269   // Are we NEON in ARM mode and have a predicate operand? If so, I know
    270   // we're not predicable but add it anyways.
    271   if (isARMNEONPred(MI))
    272     AddDefaultPred(MIB);
    273 
    274   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
    275   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
    276   bool CPSR = false;
    277   if (DefinesOptionalPredicate(MI, &CPSR)) {
    278     if (CPSR)
    279       AddDefaultT1CC(MIB);
    280     else
    281       AddDefaultCC(MIB);
    282   }
    283   return MIB;
    284 }
    285 
    286 unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
    287                                      const TargetRegisterClass *RC,
    288                                      unsigned Op0, bool Op0IsKill) {
    289   unsigned ResultReg = createResultReg(RC);
    290   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    291 
    292   // Make sure the input operand is sufficiently constrained to be legal
    293   // for this instruction.
    294   Op0 = constrainOperandRegClass(II, Op0, 1);
    295   if (II.getNumDefs() >= 1) {
    296     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    297                             ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
    298   } else {
    299     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    300                    .addReg(Op0, Op0IsKill * RegState::Kill));
    301     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    302                    TII.get(TargetOpcode::COPY), ResultReg)
    303                    .addReg(II.ImplicitDefs[0]));
    304   }
    305   return ResultReg;
    306 }
    307 
    308 unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
    309                                       const TargetRegisterClass *RC,
    310                                       unsigned Op0, bool Op0IsKill,
    311                                       unsigned Op1, bool Op1IsKill) {
    312   unsigned ResultReg = createResultReg(RC);
    313   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    314 
    315   // Make sure the input operands are sufficiently constrained to be legal
    316   // for this instruction.
    317   Op0 = constrainOperandRegClass(II, Op0, 1);
    318   Op1 = constrainOperandRegClass(II, Op1, 2);
    319 
    320   if (II.getNumDefs() >= 1) {
    321     AddOptionalDefs(
    322         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    323             .addReg(Op0, Op0IsKill * RegState::Kill)
    324             .addReg(Op1, Op1IsKill * RegState::Kill));
    325   } else {
    326     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    327                    .addReg(Op0, Op0IsKill * RegState::Kill)
    328                    .addReg(Op1, Op1IsKill * RegState::Kill));
    329     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    330                            TII.get(TargetOpcode::COPY), ResultReg)
    331                    .addReg(II.ImplicitDefs[0]));
    332   }
    333   return ResultReg;
    334 }
    335 
    336 unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
    337                                        const TargetRegisterClass *RC,
    338                                        unsigned Op0, bool Op0IsKill,
    339                                        unsigned Op1, bool Op1IsKill,
    340                                        unsigned Op2, bool Op2IsKill) {
    341   unsigned ResultReg = createResultReg(RC);
    342   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    343 
    344   // Make sure the input operands are sufficiently constrained to be legal
    345   // for this instruction.
    346   Op0 = constrainOperandRegClass(II, Op0, 1);
    347   Op1 = constrainOperandRegClass(II, Op1, 2);
    348   Op2 = constrainOperandRegClass(II, Op1, 3);
    349 
    350   if (II.getNumDefs() >= 1) {
    351     AddOptionalDefs(
    352         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    353             .addReg(Op0, Op0IsKill * RegState::Kill)
    354             .addReg(Op1, Op1IsKill * RegState::Kill)
    355             .addReg(Op2, Op2IsKill * RegState::Kill));
    356   } else {
    357     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    358                    .addReg(Op0, Op0IsKill * RegState::Kill)
    359                    .addReg(Op1, Op1IsKill * RegState::Kill)
    360                    .addReg(Op2, Op2IsKill * RegState::Kill));
    361     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    362                            TII.get(TargetOpcode::COPY), ResultReg)
    363                    .addReg(II.ImplicitDefs[0]));
    364   }
    365   return ResultReg;
    366 }
    367 
    368 unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
    369                                       const TargetRegisterClass *RC,
    370                                       unsigned Op0, bool Op0IsKill,
    371                                       uint64_t Imm) {
    372   unsigned ResultReg = createResultReg(RC);
    373   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    374 
    375   // Make sure the input operand is sufficiently constrained to be legal
    376   // for this instruction.
    377   Op0 = constrainOperandRegClass(II, Op0, 1);
    378   if (II.getNumDefs() >= 1) {
    379     AddOptionalDefs(
    380         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    381             .addReg(Op0, Op0IsKill * RegState::Kill)
    382             .addImm(Imm));
    383   } else {
    384     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    385                    .addReg(Op0, Op0IsKill * RegState::Kill)
    386                    .addImm(Imm));
    387     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    388                            TII.get(TargetOpcode::COPY), ResultReg)
    389                    .addReg(II.ImplicitDefs[0]));
    390   }
    391   return ResultReg;
    392 }
    393 
    394 unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
    395                                        const TargetRegisterClass *RC,
    396                                        unsigned Op0, bool Op0IsKill,
    397                                        unsigned Op1, bool Op1IsKill,
    398                                        uint64_t Imm) {
    399   unsigned ResultReg = createResultReg(RC);
    400   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    401 
    402   // Make sure the input operands are sufficiently constrained to be legal
    403   // for this instruction.
    404   Op0 = constrainOperandRegClass(II, Op0, 1);
    405   Op1 = constrainOperandRegClass(II, Op1, 2);
    406   if (II.getNumDefs() >= 1) {
    407     AddOptionalDefs(
    408         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    409             .addReg(Op0, Op0IsKill * RegState::Kill)
    410             .addReg(Op1, Op1IsKill * RegState::Kill)
    411             .addImm(Imm));
    412   } else {
    413     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    414                    .addReg(Op0, Op0IsKill * RegState::Kill)
    415                    .addReg(Op1, Op1IsKill * RegState::Kill)
    416                    .addImm(Imm));
    417     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    418                            TII.get(TargetOpcode::COPY), ResultReg)
    419                    .addReg(II.ImplicitDefs[0]));
    420   }
    421   return ResultReg;
    422 }
    423 
    424 unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
    425                                      const TargetRegisterClass *RC,
    426                                      uint64_t Imm) {
    427   unsigned ResultReg = createResultReg(RC);
    428   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    429 
    430   if (II.getNumDefs() >= 1) {
    431     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    432                             ResultReg).addImm(Imm));
    433   } else {
    434     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    435                    .addImm(Imm));
    436     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    437                            TII.get(TargetOpcode::COPY), ResultReg)
    438                    .addReg(II.ImplicitDefs[0]));
    439   }
    440   return ResultReg;
    441 }
    442 
    443 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
    444 // checks from the various callers.
    445 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
    446   if (VT == MVT::f64) return 0;
    447 
    448   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
    449   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    450                           TII.get(ARM::VMOVSR), MoveReg)
    451                   .addReg(SrcReg));
    452   return MoveReg;
    453 }
    454 
    455 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
    456   if (VT == MVT::i64) return 0;
    457 
    458   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
    459   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    460                           TII.get(ARM::VMOVRS), MoveReg)
    461                   .addReg(SrcReg));
    462   return MoveReg;
    463 }
    464 
    465 // For double width floating point we need to materialize two constants
    466 // (the high and the low) into integer registers then use a move to get
    467 // the combined constant into an FP reg.
    468 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
    469   const APFloat Val = CFP->getValueAPF();
    470   bool is64bit = VT == MVT::f64;
    471 
    472   // This checks to see if we can use VFP3 instructions to materialize
    473   // a constant, otherwise we have to go through the constant pool.
    474   if (TLI.isFPImmLegal(Val, VT)) {
    475     int Imm;
    476     unsigned Opc;
    477     if (is64bit) {
    478       Imm = ARM_AM::getFP64Imm(Val);
    479       Opc = ARM::FCONSTD;
    480     } else {
    481       Imm = ARM_AM::getFP32Imm(Val);
    482       Opc = ARM::FCONSTS;
    483     }
    484     unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
    485     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    486                             TII.get(Opc), DestReg).addImm(Imm));
    487     return DestReg;
    488   }
    489 
    490   // Require VFP2 for loading fp constants.
    491   if (!Subtarget->hasVFP2()) return false;
    492 
    493   // MachineConstantPool wants an explicit alignment.
    494   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
    495   if (Align == 0) {
    496     // TODO: Figure out if this is correct.
    497     Align = DL.getTypeAllocSize(CFP->getType());
    498   }
    499   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
    500   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
    501   unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
    502 
    503   // The extra reg is for addrmode5.
    504   AddOptionalDefs(
    505       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
    506           .addConstantPoolIndex(Idx)
    507           .addReg(0));
    508   return DestReg;
    509 }
    510 
    511 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
    512 
    513   if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
    514     return false;
    515 
    516   // If we can do this in a single instruction without a constant pool entry
    517   // do so now.
    518   const ConstantInt *CI = cast<ConstantInt>(C);
    519   if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
    520     unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
    521     const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
    522       &ARM::GPRRegClass;
    523     unsigned ImmReg = createResultReg(RC);
    524     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    525                             TII.get(Opc), ImmReg)
    526                     .addImm(CI->getZExtValue()));
    527     return ImmReg;
    528   }
    529 
    530   // Use MVN to emit negative constants.
    531   if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
    532     unsigned Imm = (unsigned)~(CI->getSExtValue());
    533     bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
    534       (ARM_AM::getSOImmVal(Imm) != -1);
    535     if (UseImm) {
    536       unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
    537       unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32));
    538       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    539                               TII.get(Opc), ImmReg)
    540                       .addImm(Imm));
    541       return ImmReg;
    542     }
    543   }
    544 
    545   // Load from constant pool.  For now 32-bit only.
    546   if (VT != MVT::i32)
    547     return false;
    548 
    549   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
    550 
    551   // MachineConstantPool wants an explicit alignment.
    552   unsigned Align = DL.getPrefTypeAlignment(C->getType());
    553   if (Align == 0) {
    554     // TODO: Figure out if this is correct.
    555     Align = DL.getTypeAllocSize(C->getType());
    556   }
    557   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
    558 
    559   if (isThumb2)
    560     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    561                             TII.get(ARM::t2LDRpci), DestReg)
    562                     .addConstantPoolIndex(Idx));
    563   else {
    564     // The extra immediate is for addrmode2.
    565     DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
    566     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    567                             TII.get(ARM::LDRcp), DestReg)
    568                     .addConstantPoolIndex(Idx)
    569                     .addImm(0));
    570   }
    571 
    572   return DestReg;
    573 }
    574 
    575 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
    576   // For now 32-bit only.
    577   if (VT != MVT::i32) return 0;
    578 
    579   Reloc::Model RelocM = TM.getRelocationModel();
    580   bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
    581   const TargetRegisterClass *RC = isThumb2 ?
    582     (const TargetRegisterClass*)&ARM::rGPRRegClass :
    583     (const TargetRegisterClass*)&ARM::GPRRegClass;
    584   unsigned DestReg = createResultReg(RC);
    585 
    586   // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
    587   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
    588   bool IsThreadLocal = GVar && GVar->isThreadLocal();
    589   if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
    590 
    591   // Use movw+movt when possible, it avoids constant pool entries.
    592   // Non-darwin targets only support static movt relocations in FastISel.
    593   if (Subtarget->useMovt(*FuncInfo.MF) &&
    594       (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
    595     unsigned Opc;
    596     unsigned char TF = 0;
    597     if (Subtarget->isTargetMachO())
    598       TF = ARMII::MO_NONLAZY;
    599 
    600     switch (RelocM) {
    601     case Reloc::PIC_:
    602       Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
    603       break;
    604     default:
    605       Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
    606       break;
    607     }
    608     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    609                             TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
    610   } else {
    611     // MachineConstantPool wants an explicit alignment.
    612     unsigned Align = DL.getPrefTypeAlignment(GV->getType());
    613     if (Align == 0) {
    614       // TODO: Figure out if this is correct.
    615       Align = DL.getTypeAllocSize(GV->getType());
    616     }
    617 
    618     if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
    619       return ARMLowerPICELF(GV, Align, VT);
    620 
    621     // Grab index.
    622     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
    623       (Subtarget->isThumb() ? 4 : 8);
    624     unsigned Id = AFI->createPICLabelUId();
    625     ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
    626                                                                 ARMCP::CPValue,
    627                                                                 PCAdj);
    628     unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
    629 
    630     // Load value.
    631     MachineInstrBuilder MIB;
    632     if (isThumb2) {
    633       unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
    634       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
    635                     DestReg).addConstantPoolIndex(Idx);
    636       if (RelocM == Reloc::PIC_)
    637         MIB.addImm(Id);
    638       AddOptionalDefs(MIB);
    639     } else {
    640       // The extra immediate is for addrmode2.
    641       DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
    642       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    643                     TII.get(ARM::LDRcp), DestReg)
    644                 .addConstantPoolIndex(Idx)
    645                 .addImm(0);
    646       AddOptionalDefs(MIB);
    647 
    648       if (RelocM == Reloc::PIC_) {
    649         unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
    650         unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
    651 
    652         MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
    653                                           DbgLoc, TII.get(Opc), NewDestReg)
    654                                   .addReg(DestReg)
    655                                   .addImm(Id);
    656         AddOptionalDefs(MIB);
    657         return NewDestReg;
    658       }
    659     }
    660   }
    661 
    662   if (IsIndirect) {
    663     MachineInstrBuilder MIB;
    664     unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
    665     if (isThumb2)
    666       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    667                     TII.get(ARM::t2LDRi12), NewDestReg)
    668             .addReg(DestReg)
    669             .addImm(0);
    670     else
    671       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    672                     TII.get(ARM::LDRi12), NewDestReg)
    673                 .addReg(DestReg)
    674                 .addImm(0);
    675     DestReg = NewDestReg;
    676     AddOptionalDefs(MIB);
    677   }
    678 
    679   return DestReg;
    680 }
    681 
    682 unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) {
    683   EVT CEVT = TLI.getValueType(C->getType(), true);
    684 
    685   // Only handle simple types.
    686   if (!CEVT.isSimple()) return 0;
    687   MVT VT = CEVT.getSimpleVT();
    688 
    689   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
    690     return ARMMaterializeFP(CFP, VT);
    691   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
    692     return ARMMaterializeGV(GV, VT);
    693   else if (isa<ConstantInt>(C))
    694     return ARMMaterializeInt(C, VT);
    695 
    696   return 0;
    697 }
    698 
    699 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
    700 
    701 unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) {
    702   // Don't handle dynamic allocas.
    703   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
    704 
    705   MVT VT;
    706   if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
    707 
    708   DenseMap<const AllocaInst*, int>::iterator SI =
    709     FuncInfo.StaticAllocaMap.find(AI);
    710 
    711   // This will get lowered later into the correct offsets and registers
    712   // via rewriteXFrameIndex.
    713   if (SI != FuncInfo.StaticAllocaMap.end()) {
    714     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
    715     const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
    716     unsigned ResultReg = createResultReg(RC);
    717     ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
    718 
    719     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    720                             TII.get(Opc), ResultReg)
    721                             .addFrameIndex(SI->second)
    722                             .addImm(0));
    723     return ResultReg;
    724   }
    725 
    726   return 0;
    727 }
    728 
    729 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
    730   EVT evt = TLI.getValueType(Ty, true);
    731 
    732   // Only handle simple types.
    733   if (evt == MVT::Other || !evt.isSimple()) return false;
    734   VT = evt.getSimpleVT();
    735 
    736   // Handle all legal types, i.e. a register that will directly hold this
    737   // value.
    738   return TLI.isTypeLegal(VT);
    739 }
    740 
    741 bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
    742   if (isTypeLegal(Ty, VT)) return true;
    743 
    744   // If this is a type than can be sign or zero-extended to a basic operation
    745   // go ahead and accept it now.
    746   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
    747     return true;
    748 
    749   return false;
    750 }
    751 
    752 // Computes the address to get to an object.
    753 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
    754   // Some boilerplate from the X86 FastISel.
    755   const User *U = nullptr;
    756   unsigned Opcode = Instruction::UserOp1;
    757   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    758     // Don't walk into other basic blocks unless the object is an alloca from
    759     // another block, otherwise it may not have a virtual register assigned.
    760     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    761         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    762       Opcode = I->getOpcode();
    763       U = I;
    764     }
    765   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    766     Opcode = C->getOpcode();
    767     U = C;
    768   }
    769 
    770   if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
    771     if (Ty->getAddressSpace() > 255)
    772       // Fast instruction selection doesn't support the special
    773       // address spaces.
    774       return false;
    775 
    776   switch (Opcode) {
    777     default:
    778     break;
    779     case Instruction::BitCast:
    780       // Look through bitcasts.
    781       return ARMComputeAddress(U->getOperand(0), Addr);
    782     case Instruction::IntToPtr:
    783       // Look past no-op inttoptrs.
    784       if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    785         return ARMComputeAddress(U->getOperand(0), Addr);
    786       break;
    787     case Instruction::PtrToInt:
    788       // Look past no-op ptrtoints.
    789       if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    790         return ARMComputeAddress(U->getOperand(0), Addr);
    791       break;
    792     case Instruction::GetElementPtr: {
    793       Address SavedAddr = Addr;
    794       int TmpOffset = Addr.Offset;
    795 
    796       // Iterate through the GEP folding the constants into offsets where
    797       // we can.
    798       gep_type_iterator GTI = gep_type_begin(U);
    799       for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
    800            i != e; ++i, ++GTI) {
    801         const Value *Op = *i;
    802         if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    803           const StructLayout *SL = DL.getStructLayout(STy);
    804           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    805           TmpOffset += SL->getElementOffset(Idx);
    806         } else {
    807           uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    808           for (;;) {
    809             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    810               // Constant-offset addressing.
    811               TmpOffset += CI->getSExtValue() * S;
    812               break;
    813             }
    814             if (canFoldAddIntoGEP(U, Op)) {
    815               // A compatible add with a constant operand. Fold the constant.
    816               ConstantInt *CI =
    817               cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    818               TmpOffset += CI->getSExtValue() * S;
    819               // Iterate on the other operand.
    820               Op = cast<AddOperator>(Op)->getOperand(0);
    821               continue;
    822             }
    823             // Unsupported
    824             goto unsupported_gep;
    825           }
    826         }
    827       }
    828 
    829       // Try to grab the base operand now.
    830       Addr.Offset = TmpOffset;
    831       if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
    832 
    833       // We failed, restore everything and try the other options.
    834       Addr = SavedAddr;
    835 
    836       unsupported_gep:
    837       break;
    838     }
    839     case Instruction::Alloca: {
    840       const AllocaInst *AI = cast<AllocaInst>(Obj);
    841       DenseMap<const AllocaInst*, int>::iterator SI =
    842         FuncInfo.StaticAllocaMap.find(AI);
    843       if (SI != FuncInfo.StaticAllocaMap.end()) {
    844         Addr.BaseType = Address::FrameIndexBase;
    845         Addr.Base.FI = SI->second;
    846         return true;
    847       }
    848       break;
    849     }
    850   }
    851 
    852   // Try to get this in a register if nothing else has worked.
    853   if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
    854   return Addr.Base.Reg != 0;
    855 }
    856 
    857 void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
    858   bool needsLowering = false;
    859   switch (VT.SimpleTy) {
    860     default: llvm_unreachable("Unhandled load/store type!");
    861     case MVT::i1:
    862     case MVT::i8:
    863     case MVT::i16:
    864     case MVT::i32:
    865       if (!useAM3) {
    866         // Integer loads/stores handle 12-bit offsets.
    867         needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
    868         // Handle negative offsets.
    869         if (needsLowering && isThumb2)
    870           needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
    871                             Addr.Offset > -256);
    872       } else {
    873         // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
    874         needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
    875       }
    876       break;
    877     case MVT::f32:
    878     case MVT::f64:
    879       // Floating point operands handle 8-bit offsets.
    880       needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
    881       break;
    882   }
    883 
    884   // If this is a stack pointer and the offset needs to be simplified then
    885   // put the alloca address into a register, set the base type back to
    886   // register and continue. This should almost never happen.
    887   if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
    888     const TargetRegisterClass *RC = isThumb2 ?
    889       (const TargetRegisterClass*)&ARM::tGPRRegClass :
    890       (const TargetRegisterClass*)&ARM::GPRRegClass;
    891     unsigned ResultReg = createResultReg(RC);
    892     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
    893     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    894                             TII.get(Opc), ResultReg)
    895                             .addFrameIndex(Addr.Base.FI)
    896                             .addImm(0));
    897     Addr.Base.Reg = ResultReg;
    898     Addr.BaseType = Address::RegBase;
    899   }
    900 
    901   // Since the offset is too large for the load/store instruction
    902   // get the reg+offset into a register.
    903   if (needsLowering) {
    904     Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
    905                                  /*Op0IsKill*/false, Addr.Offset, MVT::i32);
    906     Addr.Offset = 0;
    907   }
    908 }
    909 
    910 void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
    911                                        const MachineInstrBuilder &MIB,
    912                                        unsigned Flags, bool useAM3) {
    913   // addrmode5 output depends on the selection dag addressing dividing the
    914   // offset by 4 that it then later multiplies. Do this here as well.
    915   if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
    916     Addr.Offset /= 4;
    917 
    918   // Frame base works a bit differently. Handle it separately.
    919   if (Addr.BaseType == Address::FrameIndexBase) {
    920     int FI = Addr.Base.FI;
    921     int Offset = Addr.Offset;
    922     MachineMemOperand *MMO =
    923           FuncInfo.MF->getMachineMemOperand(
    924                                   MachinePointerInfo::getFixedStack(FI, Offset),
    925                                   Flags,
    926                                   MFI.getObjectSize(FI),
    927                                   MFI.getObjectAlignment(FI));
    928     // Now add the rest of the operands.
    929     MIB.addFrameIndex(FI);
    930 
    931     // ARM halfword load/stores and signed byte loads need an additional
    932     // operand.
    933     if (useAM3) {
    934       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
    935       MIB.addReg(0);
    936       MIB.addImm(Imm);
    937     } else {
    938       MIB.addImm(Addr.Offset);
    939     }
    940     MIB.addMemOperand(MMO);
    941   } else {
    942     // Now add the rest of the operands.
    943     MIB.addReg(Addr.Base.Reg);
    944 
    945     // ARM halfword load/stores and signed byte loads need an additional
    946     // operand.
    947     if (useAM3) {
    948       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
    949       MIB.addReg(0);
    950       MIB.addImm(Imm);
    951     } else {
    952       MIB.addImm(Addr.Offset);
    953     }
    954   }
    955   AddOptionalDefs(MIB);
    956 }
    957 
    958 bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    959                               unsigned Alignment, bool isZExt, bool allocReg) {
    960   unsigned Opc;
    961   bool useAM3 = false;
    962   bool needVMOV = false;
    963   const TargetRegisterClass *RC;
    964   switch (VT.SimpleTy) {
    965     // This is mostly going to be Neon/vector support.
    966     default: return false;
    967     case MVT::i1:
    968     case MVT::i8:
    969       if (isThumb2) {
    970         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
    971           Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
    972         else
    973           Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
    974       } else {
    975         if (isZExt) {
    976           Opc = ARM::LDRBi12;
    977         } else {
    978           Opc = ARM::LDRSB;
    979           useAM3 = true;
    980         }
    981       }
    982       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
    983       break;
    984     case MVT::i16:
    985       if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
    986         return false;
    987 
    988       if (isThumb2) {
    989         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
    990           Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
    991         else
    992           Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
    993       } else {
    994         Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
    995         useAM3 = true;
    996       }
    997       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
    998       break;
    999     case MVT::i32:
   1000       if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
   1001         return false;
   1002 
   1003       if (isThumb2) {
   1004         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1005           Opc = ARM::t2LDRi8;
   1006         else
   1007           Opc = ARM::t2LDRi12;
   1008       } else {
   1009         Opc = ARM::LDRi12;
   1010       }
   1011       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
   1012       break;
   1013     case MVT::f32:
   1014       if (!Subtarget->hasVFP2()) return false;
   1015       // Unaligned loads need special handling. Floats require word-alignment.
   1016       if (Alignment && Alignment < 4) {
   1017         needVMOV = true;
   1018         VT = MVT::i32;
   1019         Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
   1020         RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
   1021       } else {
   1022         Opc = ARM::VLDRS;
   1023         RC = TLI.getRegClassFor(VT);
   1024       }
   1025       break;
   1026     case MVT::f64:
   1027       if (!Subtarget->hasVFP2()) return false;
   1028       // FIXME: Unaligned loads need special handling.  Doublewords require
   1029       // word-alignment.
   1030       if (Alignment && Alignment < 4)
   1031         return false;
   1032 
   1033       Opc = ARM::VLDRD;
   1034       RC = TLI.getRegClassFor(VT);
   1035       break;
   1036   }
   1037   // Simplify this down to something we can handle.
   1038   ARMSimplifyAddress(Addr, VT, useAM3);
   1039 
   1040   // Create the base instruction, then add the operands.
   1041   if (allocReg)
   1042     ResultReg = createResultReg(RC);
   1043   assert (ResultReg > 255 && "Expected an allocated virtual register.");
   1044   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1045                                     TII.get(Opc), ResultReg);
   1046   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
   1047 
   1048   // If we had an unaligned load of a float we've converted it to an regular
   1049   // load.  Now we must move from the GRP to the FP register.
   1050   if (needVMOV) {
   1051     unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
   1052     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1053                             TII.get(ARM::VMOVSR), MoveReg)
   1054                     .addReg(ResultReg));
   1055     ResultReg = MoveReg;
   1056   }
   1057   return true;
   1058 }
   1059 
   1060 bool ARMFastISel::SelectLoad(const Instruction *I) {
   1061   // Atomic loads need special handling.
   1062   if (cast<LoadInst>(I)->isAtomic())
   1063     return false;
   1064 
   1065   // Verify we have a legal type before going any further.
   1066   MVT VT;
   1067   if (!isLoadTypeLegal(I->getType(), VT))
   1068     return false;
   1069 
   1070   // See if we can handle this address.
   1071   Address Addr;
   1072   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
   1073 
   1074   unsigned ResultReg;
   1075   if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
   1076     return false;
   1077   UpdateValueMap(I, ResultReg);
   1078   return true;
   1079 }
   1080 
   1081 bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
   1082                                unsigned Alignment) {
   1083   unsigned StrOpc;
   1084   bool useAM3 = false;
   1085   switch (VT.SimpleTy) {
   1086     // This is mostly going to be Neon/vector support.
   1087     default: return false;
   1088     case MVT::i1: {
   1089       unsigned Res = createResultReg(isThumb2 ?
   1090         (const TargetRegisterClass*)&ARM::tGPRRegClass :
   1091         (const TargetRegisterClass*)&ARM::GPRRegClass);
   1092       unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
   1093       SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
   1094       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1095                               TII.get(Opc), Res)
   1096                       .addReg(SrcReg).addImm(1));
   1097       SrcReg = Res;
   1098     } // Fallthrough here.
   1099     case MVT::i8:
   1100       if (isThumb2) {
   1101         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1102           StrOpc = ARM::t2STRBi8;
   1103         else
   1104           StrOpc = ARM::t2STRBi12;
   1105       } else {
   1106         StrOpc = ARM::STRBi12;
   1107       }
   1108       break;
   1109     case MVT::i16:
   1110       if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
   1111         return false;
   1112 
   1113       if (isThumb2) {
   1114         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1115           StrOpc = ARM::t2STRHi8;
   1116         else
   1117           StrOpc = ARM::t2STRHi12;
   1118       } else {
   1119         StrOpc = ARM::STRH;
   1120         useAM3 = true;
   1121       }
   1122       break;
   1123     case MVT::i32:
   1124       if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
   1125         return false;
   1126 
   1127       if (isThumb2) {
   1128         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1129           StrOpc = ARM::t2STRi8;
   1130         else
   1131           StrOpc = ARM::t2STRi12;
   1132       } else {
   1133         StrOpc = ARM::STRi12;
   1134       }
   1135       break;
   1136     case MVT::f32:
   1137       if (!Subtarget->hasVFP2()) return false;
   1138       // Unaligned stores need special handling. Floats require word-alignment.
   1139       if (Alignment && Alignment < 4) {
   1140         unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
   1141         AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1142                                 TII.get(ARM::VMOVRS), MoveReg)
   1143                         .addReg(SrcReg));
   1144         SrcReg = MoveReg;
   1145         VT = MVT::i32;
   1146         StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
   1147       } else {
   1148         StrOpc = ARM::VSTRS;
   1149       }
   1150       break;
   1151     case MVT::f64:
   1152       if (!Subtarget->hasVFP2()) return false;
   1153       // FIXME: Unaligned stores need special handling.  Doublewords require
   1154       // word-alignment.
   1155       if (Alignment && Alignment < 4)
   1156           return false;
   1157 
   1158       StrOpc = ARM::VSTRD;
   1159       break;
   1160   }
   1161   // Simplify this down to something we can handle.
   1162   ARMSimplifyAddress(Addr, VT, useAM3);
   1163 
   1164   // Create the base instruction, then add the operands.
   1165   SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
   1166   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1167                                     TII.get(StrOpc))
   1168                             .addReg(SrcReg);
   1169   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
   1170   return true;
   1171 }
   1172 
   1173 bool ARMFastISel::SelectStore(const Instruction *I) {
   1174   Value *Op0 = I->getOperand(0);
   1175   unsigned SrcReg = 0;
   1176 
   1177   // Atomic stores need special handling.
   1178   if (cast<StoreInst>(I)->isAtomic())
   1179     return false;
   1180 
   1181   // Verify we have a legal type before going any further.
   1182   MVT VT;
   1183   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
   1184     return false;
   1185 
   1186   // Get the value to be stored into a register.
   1187   SrcReg = getRegForValue(Op0);
   1188   if (SrcReg == 0) return false;
   1189 
   1190   // See if we can handle this address.
   1191   Address Addr;
   1192   if (!ARMComputeAddress(I->getOperand(1), Addr))
   1193     return false;
   1194 
   1195   if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
   1196     return false;
   1197   return true;
   1198 }
   1199 
   1200 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
   1201   switch (Pred) {
   1202     // Needs two compares...
   1203     case CmpInst::FCMP_ONE:
   1204     case CmpInst::FCMP_UEQ:
   1205     default:
   1206       // AL is our "false" for now. The other two need more compares.
   1207       return ARMCC::AL;
   1208     case CmpInst::ICMP_EQ:
   1209     case CmpInst::FCMP_OEQ:
   1210       return ARMCC::EQ;
   1211     case CmpInst::ICMP_SGT:
   1212     case CmpInst::FCMP_OGT:
   1213       return ARMCC::GT;
   1214     case CmpInst::ICMP_SGE:
   1215     case CmpInst::FCMP_OGE:
   1216       return ARMCC::GE;
   1217     case CmpInst::ICMP_UGT:
   1218     case CmpInst::FCMP_UGT:
   1219       return ARMCC::HI;
   1220     case CmpInst::FCMP_OLT:
   1221       return ARMCC::MI;
   1222     case CmpInst::ICMP_ULE:
   1223     case CmpInst::FCMP_OLE:
   1224       return ARMCC::LS;
   1225     case CmpInst::FCMP_ORD:
   1226       return ARMCC::VC;
   1227     case CmpInst::FCMP_UNO:
   1228       return ARMCC::VS;
   1229     case CmpInst::FCMP_UGE:
   1230       return ARMCC::PL;
   1231     case CmpInst::ICMP_SLT:
   1232     case CmpInst::FCMP_ULT:
   1233       return ARMCC::LT;
   1234     case CmpInst::ICMP_SLE:
   1235     case CmpInst::FCMP_ULE:
   1236       return ARMCC::LE;
   1237     case CmpInst::FCMP_UNE:
   1238     case CmpInst::ICMP_NE:
   1239       return ARMCC::NE;
   1240     case CmpInst::ICMP_UGE:
   1241       return ARMCC::HS;
   1242     case CmpInst::ICMP_ULT:
   1243       return ARMCC::LO;
   1244   }
   1245 }
   1246 
   1247 bool ARMFastISel::SelectBranch(const Instruction *I) {
   1248   const BranchInst *BI = cast<BranchInst>(I);
   1249   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   1250   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   1251 
   1252   // Simple branch support.
   1253 
   1254   // If we can, avoid recomputing the compare - redoing it could lead to wonky
   1255   // behavior.
   1256   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
   1257     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
   1258 
   1259       // Get the compare predicate.
   1260       // Try to take advantage of fallthrough opportunities.
   1261       CmpInst::Predicate Predicate = CI->getPredicate();
   1262       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1263         std::swap(TBB, FBB);
   1264         Predicate = CmpInst::getInversePredicate(Predicate);
   1265       }
   1266 
   1267       ARMCC::CondCodes ARMPred = getComparePred(Predicate);
   1268 
   1269       // We may not handle every CC for now.
   1270       if (ARMPred == ARMCC::AL) return false;
   1271 
   1272       // Emit the compare.
   1273       if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   1274         return false;
   1275 
   1276       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1277       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1278       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
   1279       FastEmitBranch(FBB, DbgLoc);
   1280       FuncInfo.MBB->addSuccessor(TBB);
   1281       return true;
   1282     }
   1283   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
   1284     MVT SourceVT;
   1285     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
   1286         (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
   1287       unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
   1288       unsigned OpReg = getRegForValue(TI->getOperand(0));
   1289       OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
   1290       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1291                               TII.get(TstOpc))
   1292                       .addReg(OpReg).addImm(1));
   1293 
   1294       unsigned CCMode = ARMCC::NE;
   1295       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1296         std::swap(TBB, FBB);
   1297         CCMode = ARMCC::EQ;
   1298       }
   1299 
   1300       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1301       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1302       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   1303 
   1304       FastEmitBranch(FBB, DbgLoc);
   1305       FuncInfo.MBB->addSuccessor(TBB);
   1306       return true;
   1307     }
   1308   } else if (const ConstantInt *CI =
   1309              dyn_cast<ConstantInt>(BI->getCondition())) {
   1310     uint64_t Imm = CI->getZExtValue();
   1311     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
   1312     FastEmitBranch(Target, DbgLoc);
   1313     return true;
   1314   }
   1315 
   1316   unsigned CmpReg = getRegForValue(BI->getCondition());
   1317   if (CmpReg == 0) return false;
   1318 
   1319   // We've been divorced from our compare!  Our block was split, and
   1320   // now our compare lives in a predecessor block.  We musn't
   1321   // re-compare here, as the children of the compare aren't guaranteed
   1322   // live across the block boundary (we *could* check for this).
   1323   // Regardless, the compare has been done in the predecessor block,
   1324   // and it left a value for us in a virtual register.  Ergo, we test
   1325   // the one-bit value left in the virtual register.
   1326   unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
   1327   CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
   1328   AddOptionalDefs(
   1329       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
   1330           .addReg(CmpReg)
   1331           .addImm(1));
   1332 
   1333   unsigned CCMode = ARMCC::NE;
   1334   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1335     std::swap(TBB, FBB);
   1336     CCMode = ARMCC::EQ;
   1337   }
   1338 
   1339   unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1340   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1341                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   1342   FastEmitBranch(FBB, DbgLoc);
   1343   FuncInfo.MBB->addSuccessor(TBB);
   1344   return true;
   1345 }
   1346 
   1347 bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
   1348   unsigned AddrReg = getRegForValue(I->getOperand(0));
   1349   if (AddrReg == 0) return false;
   1350 
   1351   unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
   1352   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1353                           TII.get(Opc)).addReg(AddrReg));
   1354 
   1355   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
   1356   for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
   1357     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
   1358 
   1359   return true;
   1360 }
   1361 
   1362 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   1363                              bool isZExt) {
   1364   Type *Ty = Src1Value->getType();
   1365   EVT SrcEVT = TLI.getValueType(Ty, true);
   1366   if (!SrcEVT.isSimple()) return false;
   1367   MVT SrcVT = SrcEVT.getSimpleVT();
   1368 
   1369   bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
   1370   if (isFloat && !Subtarget->hasVFP2())
   1371     return false;
   1372 
   1373   // Check to see if the 2nd operand is a constant that we can encode directly
   1374   // in the compare.
   1375   int Imm = 0;
   1376   bool UseImm = false;
   1377   bool isNegativeImm = false;
   1378   // FIXME: At -O0 we don't have anything that canonicalizes operand order.
   1379   // Thus, Src1Value may be a ConstantInt, but we're missing it.
   1380   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
   1381     if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
   1382         SrcVT == MVT::i1) {
   1383       const APInt &CIVal = ConstInt->getValue();
   1384       Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
   1385       // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
   1386       // then a cmn, because there is no way to represent 2147483648 as a
   1387       // signed 32-bit int.
   1388       if (Imm < 0 && Imm != (int)0x80000000) {
   1389         isNegativeImm = true;
   1390         Imm = -Imm;
   1391       }
   1392       UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
   1393         (ARM_AM::getSOImmVal(Imm) != -1);
   1394     }
   1395   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
   1396     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
   1397       if (ConstFP->isZero() && !ConstFP->isNegative())
   1398         UseImm = true;
   1399   }
   1400 
   1401   unsigned CmpOpc;
   1402   bool isICmp = true;
   1403   bool needsExt = false;
   1404   switch (SrcVT.SimpleTy) {
   1405     default: return false;
   1406     // TODO: Verify compares.
   1407     case MVT::f32:
   1408       isICmp = false;
   1409       CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
   1410       break;
   1411     case MVT::f64:
   1412       isICmp = false;
   1413       CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
   1414       break;
   1415     case MVT::i1:
   1416     case MVT::i8:
   1417     case MVT::i16:
   1418       needsExt = true;
   1419     // Intentional fall-through.
   1420     case MVT::i32:
   1421       if (isThumb2) {
   1422         if (!UseImm)
   1423           CmpOpc = ARM::t2CMPrr;
   1424         else
   1425           CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
   1426       } else {
   1427         if (!UseImm)
   1428           CmpOpc = ARM::CMPrr;
   1429         else
   1430           CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
   1431       }
   1432       break;
   1433   }
   1434 
   1435   unsigned SrcReg1 = getRegForValue(Src1Value);
   1436   if (SrcReg1 == 0) return false;
   1437 
   1438   unsigned SrcReg2 = 0;
   1439   if (!UseImm) {
   1440     SrcReg2 = getRegForValue(Src2Value);
   1441     if (SrcReg2 == 0) return false;
   1442   }
   1443 
   1444   // We have i1, i8, or i16, we need to either zero extend or sign extend.
   1445   if (needsExt) {
   1446     SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
   1447     if (SrcReg1 == 0) return false;
   1448     if (!UseImm) {
   1449       SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
   1450       if (SrcReg2 == 0) return false;
   1451     }
   1452   }
   1453 
   1454   const MCInstrDesc &II = TII.get(CmpOpc);
   1455   SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
   1456   if (!UseImm) {
   1457     SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
   1458     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   1459                     .addReg(SrcReg1).addReg(SrcReg2));
   1460   } else {
   1461     MachineInstrBuilder MIB;
   1462     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   1463       .addReg(SrcReg1);
   1464 
   1465     // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
   1466     if (isICmp)
   1467       MIB.addImm(Imm);
   1468     AddOptionalDefs(MIB);
   1469   }
   1470 
   1471   // For floating point we need to move the result to a comparison register
   1472   // that we can then use for branches.
   1473   if (Ty->isFloatTy() || Ty->isDoubleTy())
   1474     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1475                             TII.get(ARM::FMSTAT)));
   1476   return true;
   1477 }
   1478 
   1479 bool ARMFastISel::SelectCmp(const Instruction *I) {
   1480   const CmpInst *CI = cast<CmpInst>(I);
   1481 
   1482   // Get the compare predicate.
   1483   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
   1484 
   1485   // We may not handle every CC for now.
   1486   if (ARMPred == ARMCC::AL) return false;
   1487 
   1488   // Emit the compare.
   1489   if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   1490     return false;
   1491 
   1492   // Now set a register based on the comparison. Explicitly set the predicates
   1493   // here.
   1494   unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
   1495   const TargetRegisterClass *RC = isThumb2 ?
   1496     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   1497     (const TargetRegisterClass*)&ARM::GPRRegClass;
   1498   unsigned DestReg = createResultReg(RC);
   1499   Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
   1500   unsigned ZeroReg = TargetMaterializeConstant(Zero);
   1501   // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
   1502   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
   1503           .addReg(ZeroReg).addImm(1)
   1504           .addImm(ARMPred).addReg(ARM::CPSR);
   1505 
   1506   UpdateValueMap(I, DestReg);
   1507   return true;
   1508 }
   1509 
   1510 bool ARMFastISel::SelectFPExt(const Instruction *I) {
   1511   // Make sure we have VFP and that we're extending float to double.
   1512   if (!Subtarget->hasVFP2()) return false;
   1513 
   1514   Value *V = I->getOperand(0);
   1515   if (!I->getType()->isDoubleTy() ||
   1516       !V->getType()->isFloatTy()) return false;
   1517 
   1518   unsigned Op = getRegForValue(V);
   1519   if (Op == 0) return false;
   1520 
   1521   unsigned Result = createResultReg(&ARM::DPRRegClass);
   1522   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1523                           TII.get(ARM::VCVTDS), Result)
   1524                   .addReg(Op));
   1525   UpdateValueMap(I, Result);
   1526   return true;
   1527 }
   1528 
   1529 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   1530   // Make sure we have VFP and that we're truncating double to float.
   1531   if (!Subtarget->hasVFP2()) return false;
   1532 
   1533   Value *V = I->getOperand(0);
   1534   if (!(I->getType()->isFloatTy() &&
   1535         V->getType()->isDoubleTy())) return false;
   1536 
   1537   unsigned Op = getRegForValue(V);
   1538   if (Op == 0) return false;
   1539 
   1540   unsigned Result = createResultReg(&ARM::SPRRegClass);
   1541   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1542                           TII.get(ARM::VCVTSD), Result)
   1543                   .addReg(Op));
   1544   UpdateValueMap(I, Result);
   1545   return true;
   1546 }
   1547 
   1548 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   1549   // Make sure we have VFP.
   1550   if (!Subtarget->hasVFP2()) return false;
   1551 
   1552   MVT DstVT;
   1553   Type *Ty = I->getType();
   1554   if (!isTypeLegal(Ty, DstVT))
   1555     return false;
   1556 
   1557   Value *Src = I->getOperand(0);
   1558   EVT SrcEVT = TLI.getValueType(Src->getType(), true);
   1559   if (!SrcEVT.isSimple())
   1560     return false;
   1561   MVT SrcVT = SrcEVT.getSimpleVT();
   1562   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
   1563     return false;
   1564 
   1565   unsigned SrcReg = getRegForValue(Src);
   1566   if (SrcReg == 0) return false;
   1567 
   1568   // Handle sign-extension.
   1569   if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
   1570     SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
   1571                                        /*isZExt*/!isSigned);
   1572     if (SrcReg == 0) return false;
   1573   }
   1574 
   1575   // The conversion routine works on fp-reg to fp-reg and the operand above
   1576   // was an integer, move it to the fp registers if possible.
   1577   unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
   1578   if (FP == 0) return false;
   1579 
   1580   unsigned Opc;
   1581   if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
   1582   else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
   1583   else return false;
   1584 
   1585   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
   1586   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1587                           TII.get(Opc), ResultReg).addReg(FP));
   1588   UpdateValueMap(I, ResultReg);
   1589   return true;
   1590 }
   1591 
   1592 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   1593   // Make sure we have VFP.
   1594   if (!Subtarget->hasVFP2()) return false;
   1595 
   1596   MVT DstVT;
   1597   Type *RetTy = I->getType();
   1598   if (!isTypeLegal(RetTy, DstVT))
   1599     return false;
   1600 
   1601   unsigned Op = getRegForValue(I->getOperand(0));
   1602   if (Op == 0) return false;
   1603 
   1604   unsigned Opc;
   1605   Type *OpTy = I->getOperand(0)->getType();
   1606   if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
   1607   else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
   1608   else return false;
   1609 
   1610   // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
   1611   unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
   1612   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1613                           TII.get(Opc), ResultReg).addReg(Op));
   1614 
   1615   // This result needs to be in an integer register, but the conversion only
   1616   // takes place in fp-regs.
   1617   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
   1618   if (IntReg == 0) return false;
   1619 
   1620   UpdateValueMap(I, IntReg);
   1621   return true;
   1622 }
   1623 
   1624 bool ARMFastISel::SelectSelect(const Instruction *I) {
   1625   MVT VT;
   1626   if (!isTypeLegal(I->getType(), VT))
   1627     return false;
   1628 
   1629   // Things need to be register sized for register moves.
   1630   if (VT != MVT::i32) return false;
   1631 
   1632   unsigned CondReg = getRegForValue(I->getOperand(0));
   1633   if (CondReg == 0) return false;
   1634   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1635   if (Op1Reg == 0) return false;
   1636 
   1637   // Check to see if we can use an immediate in the conditional move.
   1638   int Imm = 0;
   1639   bool UseImm = false;
   1640   bool isNegativeImm = false;
   1641   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
   1642     assert (VT == MVT::i32 && "Expecting an i32.");
   1643     Imm = (int)ConstInt->getValue().getZExtValue();
   1644     if (Imm < 0) {
   1645       isNegativeImm = true;
   1646       Imm = ~Imm;
   1647     }
   1648     UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
   1649       (ARM_AM::getSOImmVal(Imm) != -1);
   1650   }
   1651 
   1652   unsigned Op2Reg = 0;
   1653   if (!UseImm) {
   1654     Op2Reg = getRegForValue(I->getOperand(2));
   1655     if (Op2Reg == 0) return false;
   1656   }
   1657 
   1658   unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
   1659   CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
   1660   AddOptionalDefs(
   1661       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
   1662           .addReg(CondReg)
   1663           .addImm(0));
   1664 
   1665   unsigned MovCCOpc;
   1666   const TargetRegisterClass *RC;
   1667   if (!UseImm) {
   1668     RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
   1669     MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
   1670   } else {
   1671     RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
   1672     if (!isNegativeImm)
   1673       MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
   1674     else
   1675       MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
   1676   }
   1677   unsigned ResultReg = createResultReg(RC);
   1678   if (!UseImm) {
   1679     Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
   1680     Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
   1681     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
   1682             ResultReg)
   1683         .addReg(Op2Reg)
   1684         .addReg(Op1Reg)
   1685         .addImm(ARMCC::NE)
   1686         .addReg(ARM::CPSR);
   1687   } else {
   1688     Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
   1689     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
   1690             ResultReg)
   1691         .addReg(Op1Reg)
   1692         .addImm(Imm)
   1693         .addImm(ARMCC::EQ)
   1694         .addReg(ARM::CPSR);
   1695   }
   1696   UpdateValueMap(I, ResultReg);
   1697   return true;
   1698 }
   1699 
   1700 bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
   1701   MVT VT;
   1702   Type *Ty = I->getType();
   1703   if (!isTypeLegal(Ty, VT))
   1704     return false;
   1705 
   1706   // If we have integer div support we should have selected this automagically.
   1707   // In case we have a real miss go ahead and return false and we'll pick
   1708   // it up later.
   1709   if (Subtarget->hasDivide()) return false;
   1710 
   1711   // Otherwise emit a libcall.
   1712   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   1713   if (VT == MVT::i8)
   1714     LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
   1715   else if (VT == MVT::i16)
   1716     LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
   1717   else if (VT == MVT::i32)
   1718     LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
   1719   else if (VT == MVT::i64)
   1720     LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
   1721   else if (VT == MVT::i128)
   1722     LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
   1723   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
   1724 
   1725   return ARMEmitLibcall(I, LC);
   1726 }
   1727 
   1728 bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
   1729   MVT VT;
   1730   Type *Ty = I->getType();
   1731   if (!isTypeLegal(Ty, VT))
   1732     return false;
   1733 
   1734   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   1735   if (VT == MVT::i8)
   1736     LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
   1737   else if (VT == MVT::i16)
   1738     LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
   1739   else if (VT == MVT::i32)
   1740     LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
   1741   else if (VT == MVT::i64)
   1742     LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
   1743   else if (VT == MVT::i128)
   1744     LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
   1745   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
   1746 
   1747   return ARMEmitLibcall(I, LC);
   1748 }
   1749 
   1750 bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
   1751   EVT DestVT  = TLI.getValueType(I->getType(), true);
   1752 
   1753   // We can get here in the case when we have a binary operation on a non-legal
   1754   // type and the target independent selector doesn't know how to handle it.
   1755   if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
   1756     return false;
   1757 
   1758   unsigned Opc;
   1759   switch (ISDOpcode) {
   1760     default: return false;
   1761     case ISD::ADD:
   1762       Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
   1763       break;
   1764     case ISD::OR:
   1765       Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
   1766       break;
   1767     case ISD::SUB:
   1768       Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
   1769       break;
   1770   }
   1771 
   1772   unsigned SrcReg1 = getRegForValue(I->getOperand(0));
   1773   if (SrcReg1 == 0) return false;
   1774 
   1775   // TODO: Often the 2nd operand is an immediate, which can be encoded directly
   1776   // in the instruction, rather then materializing the value in a register.
   1777   unsigned SrcReg2 = getRegForValue(I->getOperand(1));
   1778   if (SrcReg2 == 0) return false;
   1779 
   1780   unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
   1781   SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
   1782   SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
   1783   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1784                           TII.get(Opc), ResultReg)
   1785                   .addReg(SrcReg1).addReg(SrcReg2));
   1786   UpdateValueMap(I, ResultReg);
   1787   return true;
   1788 }
   1789 
   1790 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   1791   EVT FPVT = TLI.getValueType(I->getType(), true);
   1792   if (!FPVT.isSimple()) return false;
   1793   MVT VT = FPVT.getSimpleVT();
   1794 
   1795   // We can get here in the case when we want to use NEON for our fp
   1796   // operations, but can't figure out how to. Just use the vfp instructions
   1797   // if we have them.
   1798   // FIXME: It'd be nice to use NEON instructions.
   1799   Type *Ty = I->getType();
   1800   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
   1801   if (isFloat && !Subtarget->hasVFP2())
   1802     return false;
   1803 
   1804   unsigned Opc;
   1805   bool is64bit = VT == MVT::f64 || VT == MVT::i64;
   1806   switch (ISDOpcode) {
   1807     default: return false;
   1808     case ISD::FADD:
   1809       Opc = is64bit ? ARM::VADDD : ARM::VADDS;
   1810       break;
   1811     case ISD::FSUB:
   1812       Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
   1813       break;
   1814     case ISD::FMUL:
   1815       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
   1816       break;
   1817   }
   1818   unsigned Op1 = getRegForValue(I->getOperand(0));
   1819   if (Op1 == 0) return false;
   1820 
   1821   unsigned Op2 = getRegForValue(I->getOperand(1));
   1822   if (Op2 == 0) return false;
   1823 
   1824   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
   1825   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1826                           TII.get(Opc), ResultReg)
   1827                   .addReg(Op1).addReg(Op2));
   1828   UpdateValueMap(I, ResultReg);
   1829   return true;
   1830 }
   1831 
   1832 // Call Handling Code
   1833 
   1834 // This is largely taken directly from CCAssignFnForNode
   1835 // TODO: We may not support all of this.
   1836 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   1837                                            bool Return,
   1838                                            bool isVarArg) {
   1839   switch (CC) {
   1840   default:
   1841     llvm_unreachable("Unsupported calling convention");
   1842   case CallingConv::Fast:
   1843     if (Subtarget->hasVFP2() && !isVarArg) {
   1844       if (!Subtarget->isAAPCS_ABI())
   1845         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1846       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1847       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1848     }
   1849     // Fallthrough
   1850   case CallingConv::C:
   1851     // Use target triple & subtarget features to do actual dispatch.
   1852     if (Subtarget->isAAPCS_ABI()) {
   1853       if (Subtarget->hasVFP2() &&
   1854           TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
   1855         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   1856       else
   1857         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   1858     } else
   1859         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   1860   case CallingConv::ARM_AAPCS_VFP:
   1861     if (!isVarArg)
   1862       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   1863     // Fall through to soft float variant, variadic functions don't
   1864     // use hard floating point ABI.
   1865   case CallingConv::ARM_AAPCS:
   1866     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   1867   case CallingConv::ARM_APCS:
   1868     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   1869   case CallingConv::GHC:
   1870     if (Return)
   1871       llvm_unreachable("Can't return in GHC call convention");
   1872     else
   1873       return CC_ARM_APCS_GHC;
   1874   }
   1875 }
   1876 
   1877 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   1878                                   SmallVectorImpl<unsigned> &ArgRegs,
   1879                                   SmallVectorImpl<MVT> &ArgVTs,
   1880                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
   1881                                   SmallVectorImpl<unsigned> &RegArgs,
   1882                                   CallingConv::ID CC,
   1883                                   unsigned &NumBytes,
   1884                                   bool isVarArg) {
   1885   SmallVector<CCValAssign, 16> ArgLocs;
   1886   CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs, *Context);
   1887   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
   1888                              CCAssignFnForCall(CC, false, isVarArg));
   1889 
   1890   // Check that we can handle all of the arguments. If we can't, then bail out
   1891   // now before we add code to the MBB.
   1892   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1893     CCValAssign &VA = ArgLocs[i];
   1894     MVT ArgVT = ArgVTs[VA.getValNo()];
   1895 
   1896     // We don't handle NEON/vector parameters yet.
   1897     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
   1898       return false;
   1899 
   1900     // Now copy/store arg to correct locations.
   1901     if (VA.isRegLoc() && !VA.needsCustom()) {
   1902       continue;
   1903     } else if (VA.needsCustom()) {
   1904       // TODO: We need custom lowering for vector (v2f64) args.
   1905       if (VA.getLocVT() != MVT::f64 ||
   1906           // TODO: Only handle register args for now.
   1907           !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
   1908         return false;
   1909     } else {
   1910       switch (ArgVT.SimpleTy) {
   1911       default:
   1912         return false;
   1913       case MVT::i1:
   1914       case MVT::i8:
   1915       case MVT::i16:
   1916       case MVT::i32:
   1917         break;
   1918       case MVT::f32:
   1919         if (!Subtarget->hasVFP2())
   1920           return false;
   1921         break;
   1922       case MVT::f64:
   1923         if (!Subtarget->hasVFP2())
   1924           return false;
   1925         break;
   1926       }
   1927     }
   1928   }
   1929 
   1930   // At the point, we are able to handle the call's arguments in fast isel.
   1931 
   1932   // Get a count of how many bytes are to be pushed on the stack.
   1933   NumBytes = CCInfo.getNextStackOffset();
   1934 
   1935   // Issue CALLSEQ_START
   1936   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   1937   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1938                           TII.get(AdjStackDown))
   1939                   .addImm(NumBytes));
   1940 
   1941   // Process the args.
   1942   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1943     CCValAssign &VA = ArgLocs[i];
   1944     unsigned Arg = ArgRegs[VA.getValNo()];
   1945     MVT ArgVT = ArgVTs[VA.getValNo()];
   1946 
   1947     assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
   1948            "We don't handle NEON/vector parameters yet.");
   1949 
   1950     // Handle arg promotion, etc.
   1951     switch (VA.getLocInfo()) {
   1952       case CCValAssign::Full: break;
   1953       case CCValAssign::SExt: {
   1954         MVT DestVT = VA.getLocVT();
   1955         Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
   1956         assert (Arg != 0 && "Failed to emit a sext");
   1957         ArgVT = DestVT;
   1958         break;
   1959       }
   1960       case CCValAssign::AExt:
   1961         // Intentional fall-through.  Handle AExt and ZExt.
   1962       case CCValAssign::ZExt: {
   1963         MVT DestVT = VA.getLocVT();
   1964         Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
   1965         assert (Arg != 0 && "Failed to emit a zext");
   1966         ArgVT = DestVT;
   1967         break;
   1968       }
   1969       case CCValAssign::BCvt: {
   1970         unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
   1971                                  /*TODO: Kill=*/false);
   1972         assert(BC != 0 && "Failed to emit a bitcast!");
   1973         Arg = BC;
   1974         ArgVT = VA.getLocVT();
   1975         break;
   1976       }
   1977       default: llvm_unreachable("Unknown arg promotion!");
   1978     }
   1979 
   1980     // Now copy/store arg to correct locations.
   1981     if (VA.isRegLoc() && !VA.needsCustom()) {
   1982       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1983               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
   1984       RegArgs.push_back(VA.getLocReg());
   1985     } else if (VA.needsCustom()) {
   1986       // TODO: We need custom lowering for vector (v2f64) args.
   1987       assert(VA.getLocVT() == MVT::f64 &&
   1988              "Custom lowering for v2f64 args not available");
   1989 
   1990       CCValAssign &NextVA = ArgLocs[++i];
   1991 
   1992       assert(VA.isRegLoc() && NextVA.isRegLoc() &&
   1993              "We only handle register args!");
   1994 
   1995       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1996                               TII.get(ARM::VMOVRRD), VA.getLocReg())
   1997                       .addReg(NextVA.getLocReg(), RegState::Define)
   1998                       .addReg(Arg));
   1999       RegArgs.push_back(VA.getLocReg());
   2000       RegArgs.push_back(NextVA.getLocReg());
   2001     } else {
   2002       assert(VA.isMemLoc());
   2003       // Need to store on the stack.
   2004       Address Addr;
   2005       Addr.BaseType = Address::RegBase;
   2006       Addr.Base.Reg = ARM::SP;
   2007       Addr.Offset = VA.getLocMemOffset();
   2008 
   2009       bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
   2010       assert(EmitRet && "Could not emit a store for argument!");
   2011     }
   2012   }
   2013 
   2014   return true;
   2015 }
   2016 
   2017 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   2018                              const Instruction *I, CallingConv::ID CC,
   2019                              unsigned &NumBytes, bool isVarArg) {
   2020   // Issue CALLSEQ_END
   2021   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   2022   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2023                           TII.get(AdjStackUp))
   2024                   .addImm(NumBytes).addImm(0));
   2025 
   2026   // Now the return value.
   2027   if (RetVT != MVT::isVoid) {
   2028     SmallVector<CCValAssign, 16> RVLocs;
   2029     CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
   2030     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
   2031 
   2032     // Copy all of the result registers out of their specified physreg.
   2033     if (RVLocs.size() == 2 && RetVT == MVT::f64) {
   2034       // For this move we copy into two registers and then move into the
   2035       // double fp reg we want.
   2036       MVT DestVT = RVLocs[0].getValVT();
   2037       const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
   2038       unsigned ResultReg = createResultReg(DstRC);
   2039       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2040                               TII.get(ARM::VMOVDRR), ResultReg)
   2041                       .addReg(RVLocs[0].getLocReg())
   2042                       .addReg(RVLocs[1].getLocReg()));
   2043 
   2044       UsedRegs.push_back(RVLocs[0].getLocReg());
   2045       UsedRegs.push_back(RVLocs[1].getLocReg());
   2046 
   2047       // Finally update the result.
   2048       UpdateValueMap(I, ResultReg);
   2049     } else {
   2050       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
   2051       MVT CopyVT = RVLocs[0].getValVT();
   2052 
   2053       // Special handling for extended integers.
   2054       if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
   2055         CopyVT = MVT::i32;
   2056 
   2057       const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
   2058 
   2059       unsigned ResultReg = createResultReg(DstRC);
   2060       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2061               TII.get(TargetOpcode::COPY),
   2062               ResultReg).addReg(RVLocs[0].getLocReg());
   2063       UsedRegs.push_back(RVLocs[0].getLocReg());
   2064 
   2065       // Finally update the result.
   2066       UpdateValueMap(I, ResultReg);
   2067     }
   2068   }
   2069 
   2070   return true;
   2071 }
   2072 
   2073 bool ARMFastISel::SelectRet(const Instruction *I) {
   2074   const ReturnInst *Ret = cast<ReturnInst>(I);
   2075   const Function &F = *I->getParent()->getParent();
   2076 
   2077   if (!FuncInfo.CanLowerReturn)
   2078     return false;
   2079 
   2080   // Build a list of return value registers.
   2081   SmallVector<unsigned, 4> RetRegs;
   2082 
   2083   CallingConv::ID CC = F.getCallingConv();
   2084   if (Ret->getNumOperands() > 0) {
   2085     SmallVector<ISD::OutputArg, 4> Outs;
   2086     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
   2087 
   2088     // Analyze operands of the call, assigning locations to each operand.
   2089     SmallVector<CCValAssign, 16> ValLocs;
   2090     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext());
   2091     CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
   2092                                                  F.isVarArg()));
   2093 
   2094     const Value *RV = Ret->getOperand(0);
   2095     unsigned Reg = getRegForValue(RV);
   2096     if (Reg == 0)
   2097       return false;
   2098 
   2099     // Only handle a single return value for now.
   2100     if (ValLocs.size() != 1)
   2101       return false;
   2102 
   2103     CCValAssign &VA = ValLocs[0];
   2104 
   2105     // Don't bother handling odd stuff for now.
   2106     if (VA.getLocInfo() != CCValAssign::Full)
   2107       return false;
   2108     // Only handle register returns for now.
   2109     if (!VA.isRegLoc())
   2110       return false;
   2111 
   2112     unsigned SrcReg = Reg + VA.getValNo();
   2113     EVT RVEVT = TLI.getValueType(RV->getType());
   2114     if (!RVEVT.isSimple()) return false;
   2115     MVT RVVT = RVEVT.getSimpleVT();
   2116     MVT DestVT = VA.getValVT();
   2117     // Special handling for extended integers.
   2118     if (RVVT != DestVT) {
   2119       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
   2120         return false;
   2121 
   2122       assert(DestVT == MVT::i32 && "ARM should always ext to i32");
   2123 
   2124       // Perform extension if flagged as either zext or sext.  Otherwise, do
   2125       // nothing.
   2126       if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
   2127         SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
   2128         if (SrcReg == 0) return false;
   2129       }
   2130     }
   2131 
   2132     // Make the copy.
   2133     unsigned DstReg = VA.getLocReg();
   2134     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
   2135     // Avoid a cross-class copy. This is very unlikely.
   2136     if (!SrcRC->contains(DstReg))
   2137       return false;
   2138     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2139             TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
   2140 
   2141     // Add register to return instruction.
   2142     RetRegs.push_back(VA.getLocReg());
   2143   }
   2144 
   2145   unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
   2146   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2147                                     TII.get(RetOpc));
   2148   AddOptionalDefs(MIB);
   2149   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   2150     MIB.addReg(RetRegs[i], RegState::Implicit);
   2151   return true;
   2152 }
   2153 
   2154 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
   2155   if (UseReg)
   2156     return isThumb2 ? ARM::tBLXr : ARM::BLX;
   2157   else
   2158     return isThumb2 ? ARM::tBL : ARM::BL;
   2159 }
   2160 
   2161 unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
   2162   // Manually compute the global's type to avoid building it when unnecessary.
   2163   Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
   2164   EVT LCREVT = TLI.getValueType(GVTy);
   2165   if (!LCREVT.isSimple()) return 0;
   2166 
   2167   GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
   2168                                        GlobalValue::ExternalLinkage, nullptr,
   2169                                        Name);
   2170   assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
   2171   return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
   2172 }
   2173 
   2174 // A quick function that will emit a call for a named libcall in F with the
   2175 // vector of passed arguments for the Instruction in I. We can assume that we
   2176 // can emit a call for any libcall we can produce. This is an abridged version
   2177 // of the full call infrastructure since we won't need to worry about things
   2178 // like computed function pointers or strange arguments at call sites.
   2179 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
   2180 // with X86.
   2181 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   2182   CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
   2183 
   2184   // Handle *simple* calls for now.
   2185   Type *RetTy = I->getType();
   2186   MVT RetVT;
   2187   if (RetTy->isVoidTy())
   2188     RetVT = MVT::isVoid;
   2189   else if (!isTypeLegal(RetTy, RetVT))
   2190     return false;
   2191 
   2192   // Can't handle non-double multi-reg retvals.
   2193   if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
   2194     SmallVector<CCValAssign, 16> RVLocs;
   2195     CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context);
   2196     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
   2197     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
   2198       return false;
   2199   }
   2200 
   2201   // Set up the argument vectors.
   2202   SmallVector<Value*, 8> Args;
   2203   SmallVector<unsigned, 8> ArgRegs;
   2204   SmallVector<MVT, 8> ArgVTs;
   2205   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   2206   Args.reserve(I->getNumOperands());
   2207   ArgRegs.reserve(I->getNumOperands());
   2208   ArgVTs.reserve(I->getNumOperands());
   2209   ArgFlags.reserve(I->getNumOperands());
   2210   for (unsigned i = 0; i < I->getNumOperands(); ++i) {
   2211     Value *Op = I->getOperand(i);
   2212     unsigned Arg = getRegForValue(Op);
   2213     if (Arg == 0) return false;
   2214 
   2215     Type *ArgTy = Op->getType();
   2216     MVT ArgVT;
   2217     if (!isTypeLegal(ArgTy, ArgVT)) return false;
   2218 
   2219     ISD::ArgFlagsTy Flags;
   2220     unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
   2221     Flags.setOrigAlign(OriginalAlignment);
   2222 
   2223     Args.push_back(Op);
   2224     ArgRegs.push_back(Arg);
   2225     ArgVTs.push_back(ArgVT);
   2226     ArgFlags.push_back(Flags);
   2227   }
   2228 
   2229   // Handle the arguments now that we've gotten them.
   2230   SmallVector<unsigned, 4> RegArgs;
   2231   unsigned NumBytes;
   2232   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   2233                        RegArgs, CC, NumBytes, false))
   2234     return false;
   2235 
   2236   unsigned CalleeReg = 0;
   2237   if (EnableARMLongCalls) {
   2238     CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
   2239     if (CalleeReg == 0) return false;
   2240   }
   2241 
   2242   // Issue the call.
   2243   unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
   2244   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2245                                     DbgLoc, TII.get(CallOpc));
   2246   // BL / BLX don't take a predicate, but tBL / tBLX do.
   2247   if (isThumb2)
   2248     AddDefaultPred(MIB);
   2249   if (EnableARMLongCalls)
   2250     MIB.addReg(CalleeReg);
   2251   else
   2252     MIB.addExternalSymbol(TLI.getLibcallName(Call));
   2253 
   2254   // Add implicit physical register uses to the call.
   2255   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   2256     MIB.addReg(RegArgs[i], RegState::Implicit);
   2257 
   2258   // Add a register mask with the call-preserved registers.
   2259   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   2260   MIB.addRegMask(TRI.getCallPreservedMask(CC));
   2261 
   2262   // Finish off the call including any return values.
   2263   SmallVector<unsigned, 4> UsedRegs;
   2264   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
   2265 
   2266   // Set all unused physreg defs as dead.
   2267   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   2268 
   2269   return true;
   2270 }
   2271 
   2272 bool ARMFastISel::SelectCall(const Instruction *I,
   2273                              const char *IntrMemName = nullptr) {
   2274   const CallInst *CI = cast<CallInst>(I);
   2275   const Value *Callee = CI->getCalledValue();
   2276 
   2277   // Can't handle inline asm.
   2278   if (isa<InlineAsm>(Callee)) return false;
   2279 
   2280   // Allow SelectionDAG isel to handle tail calls.
   2281   if (CI->isTailCall()) return false;
   2282 
   2283   // Check the calling convention.
   2284   ImmutableCallSite CS(CI);
   2285   CallingConv::ID CC = CS.getCallingConv();
   2286 
   2287   // TODO: Avoid some calling conventions?
   2288 
   2289   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   2290   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   2291   bool isVarArg = FTy->isVarArg();
   2292 
   2293   // Handle *simple* calls for now.
   2294   Type *RetTy = I->getType();
   2295   MVT RetVT;
   2296   if (RetTy->isVoidTy())
   2297     RetVT = MVT::isVoid;
   2298   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
   2299            RetVT != MVT::i8  && RetVT != MVT::i1)
   2300     return false;
   2301 
   2302   // Can't handle non-double multi-reg retvals.
   2303   if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
   2304       RetVT != MVT::i16 && RetVT != MVT::i32) {
   2305     SmallVector<CCValAssign, 16> RVLocs;
   2306     CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, RVLocs, *Context);
   2307     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
   2308     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
   2309       return false;
   2310   }
   2311 
   2312   // Set up the argument vectors.
   2313   SmallVector<Value*, 8> Args;
   2314   SmallVector<unsigned, 8> ArgRegs;
   2315   SmallVector<MVT, 8> ArgVTs;
   2316   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   2317   unsigned arg_size = CS.arg_size();
   2318   Args.reserve(arg_size);
   2319   ArgRegs.reserve(arg_size);
   2320   ArgVTs.reserve(arg_size);
   2321   ArgFlags.reserve(arg_size);
   2322   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
   2323        i != e; ++i) {
   2324     // If we're lowering a memory intrinsic instead of a regular call, skip the
   2325     // last two arguments, which shouldn't be passed to the underlying function.
   2326     if (IntrMemName && e-i <= 2)
   2327       break;
   2328 
   2329     ISD::ArgFlagsTy Flags;
   2330     unsigned AttrInd = i - CS.arg_begin() + 1;
   2331     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
   2332       Flags.setSExt();
   2333     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
   2334       Flags.setZExt();
   2335 
   2336     // FIXME: Only handle *easy* calls for now.
   2337     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
   2338         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
   2339         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
   2340         CS.paramHasAttr(AttrInd, Attribute::ByVal))
   2341       return false;
   2342 
   2343     Type *ArgTy = (*i)->getType();
   2344     MVT ArgVT;
   2345     if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
   2346         ArgVT != MVT::i1)
   2347       return false;
   2348 
   2349     unsigned Arg = getRegForValue(*i);
   2350     if (Arg == 0)
   2351       return false;
   2352 
   2353     unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
   2354     Flags.setOrigAlign(OriginalAlignment);
   2355 
   2356     Args.push_back(*i);
   2357     ArgRegs.push_back(Arg);
   2358     ArgVTs.push_back(ArgVT);
   2359     ArgFlags.push_back(Flags);
   2360   }
   2361 
   2362   // Handle the arguments now that we've gotten them.
   2363   SmallVector<unsigned, 4> RegArgs;
   2364   unsigned NumBytes;
   2365   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   2366                        RegArgs, CC, NumBytes, isVarArg))
   2367     return false;
   2368 
   2369   bool UseReg = false;
   2370   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
   2371   if (!GV || EnableARMLongCalls) UseReg = true;
   2372 
   2373   unsigned CalleeReg = 0;
   2374   if (UseReg) {
   2375     if (IntrMemName)
   2376       CalleeReg = getLibcallReg(IntrMemName);
   2377     else
   2378       CalleeReg = getRegForValue(Callee);
   2379 
   2380     if (CalleeReg == 0) return false;
   2381   }
   2382 
   2383   // Issue the call.
   2384   unsigned CallOpc = ARMSelectCallOp(UseReg);
   2385   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2386                                     DbgLoc, TII.get(CallOpc));
   2387 
   2388   unsigned char OpFlags = 0;
   2389 
   2390   // Add MO_PLT for global address or external symbol in the PIC relocation
   2391   // model.
   2392   if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
   2393     OpFlags = ARMII::MO_PLT;
   2394 
   2395   // ARM calls don't take a predicate, but tBL / tBLX do.
   2396   if(isThumb2)
   2397     AddDefaultPred(MIB);
   2398   if (UseReg)
   2399     MIB.addReg(CalleeReg);
   2400   else if (!IntrMemName)
   2401     MIB.addGlobalAddress(GV, 0, OpFlags);
   2402   else
   2403     MIB.addExternalSymbol(IntrMemName, OpFlags);
   2404 
   2405   // Add implicit physical register uses to the call.
   2406   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   2407     MIB.addReg(RegArgs[i], RegState::Implicit);
   2408 
   2409   // Add a register mask with the call-preserved registers.
   2410   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   2411   MIB.addRegMask(TRI.getCallPreservedMask(CC));
   2412 
   2413   // Finish off the call including any return values.
   2414   SmallVector<unsigned, 4> UsedRegs;
   2415   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
   2416     return false;
   2417 
   2418   // Set all unused physreg defs as dead.
   2419   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   2420 
   2421   return true;
   2422 }
   2423 
   2424 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
   2425   return Len <= 16;
   2426 }
   2427 
   2428 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
   2429                                         uint64_t Len, unsigned Alignment) {
   2430   // Make sure we don't bloat code by inlining very large memcpy's.
   2431   if (!ARMIsMemCpySmall(Len))
   2432     return false;
   2433 
   2434   while (Len) {
   2435     MVT VT;
   2436     if (!Alignment || Alignment >= 4) {
   2437       if (Len >= 4)
   2438         VT = MVT::i32;
   2439       else if (Len >= 2)
   2440         VT = MVT::i16;
   2441       else {
   2442         assert (Len == 1 && "Expected a length of 1!");
   2443         VT = MVT::i8;
   2444       }
   2445     } else {
   2446       // Bound based on alignment.
   2447       if (Len >= 2 && Alignment == 2)
   2448         VT = MVT::i16;
   2449       else {
   2450         VT = MVT::i8;
   2451       }
   2452     }
   2453 
   2454     bool RV;
   2455     unsigned ResultReg;
   2456     RV = ARMEmitLoad(VT, ResultReg, Src);
   2457     assert (RV == true && "Should be able to handle this load.");
   2458     RV = ARMEmitStore(VT, ResultReg, Dest);
   2459     assert (RV == true && "Should be able to handle this store.");
   2460     (void)RV;
   2461 
   2462     unsigned Size = VT.getSizeInBits()/8;
   2463     Len -= Size;
   2464     Dest.Offset += Size;
   2465     Src.Offset += Size;
   2466   }
   2467 
   2468   return true;
   2469 }
   2470 
   2471 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   2472   // FIXME: Handle more intrinsics.
   2473   switch (I.getIntrinsicID()) {
   2474   default: return false;
   2475   case Intrinsic::frameaddress: {
   2476     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
   2477     MFI->setFrameAddressIsTaken(true);
   2478 
   2479     unsigned LdrOpc;
   2480     const TargetRegisterClass *RC;
   2481     if (isThumb2) {
   2482       LdrOpc =  ARM::t2LDRi12;
   2483       RC = (const TargetRegisterClass*)&ARM::tGPRRegClass;
   2484     } else {
   2485       LdrOpc =  ARM::LDRi12;
   2486       RC = (const TargetRegisterClass*)&ARM::GPRRegClass;
   2487     }
   2488 
   2489     const ARMBaseRegisterInfo *RegInfo =
   2490           static_cast<const ARMBaseRegisterInfo*>(TM.getRegisterInfo());
   2491     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
   2492     unsigned SrcReg = FramePtr;
   2493 
   2494     // Recursively load frame address
   2495     // ldr r0 [fp]
   2496     // ldr r0 [r0]
   2497     // ldr r0 [r0]
   2498     // ...
   2499     unsigned DestReg;
   2500     unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
   2501     while (Depth--) {
   2502       DestReg = createResultReg(RC);
   2503       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2504                               TII.get(LdrOpc), DestReg)
   2505                       .addReg(SrcReg).addImm(0));
   2506       SrcReg = DestReg;
   2507     }
   2508     UpdateValueMap(&I, SrcReg);
   2509     return true;
   2510   }
   2511   case Intrinsic::memcpy:
   2512   case Intrinsic::memmove: {
   2513     const MemTransferInst &MTI = cast<MemTransferInst>(I);
   2514     // Don't handle volatile.
   2515     if (MTI.isVolatile())
   2516       return false;
   2517 
   2518     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
   2519     // we would emit dead code because we don't currently handle memmoves.
   2520     bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
   2521     if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
   2522       // Small memcpy's are common enough that we want to do them without a call
   2523       // if possible.
   2524       uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
   2525       if (ARMIsMemCpySmall(Len)) {
   2526         Address Dest, Src;
   2527         if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
   2528             !ARMComputeAddress(MTI.getRawSource(), Src))
   2529           return false;
   2530         unsigned Alignment = MTI.getAlignment();
   2531         if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
   2532           return true;
   2533       }
   2534     }
   2535 
   2536     if (!MTI.getLength()->getType()->isIntegerTy(32))
   2537       return false;
   2538 
   2539     if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
   2540       return false;
   2541 
   2542     const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
   2543     return SelectCall(&I, IntrMemName);
   2544   }
   2545   case Intrinsic::memset: {
   2546     const MemSetInst &MSI = cast<MemSetInst>(I);
   2547     // Don't handle volatile.
   2548     if (MSI.isVolatile())
   2549       return false;
   2550 
   2551     if (!MSI.getLength()->getType()->isIntegerTy(32))
   2552       return false;
   2553 
   2554     if (MSI.getDestAddressSpace() > 255)
   2555       return false;
   2556 
   2557     return SelectCall(&I, "memset");
   2558   }
   2559   case Intrinsic::trap: {
   2560     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
   2561       Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
   2562     return true;
   2563   }
   2564   }
   2565 }
   2566 
   2567 bool ARMFastISel::SelectTrunc(const Instruction *I) {
   2568   // The high bits for a type smaller than the register size are assumed to be
   2569   // undefined.
   2570   Value *Op = I->getOperand(0);
   2571 
   2572   EVT SrcVT, DestVT;
   2573   SrcVT = TLI.getValueType(Op->getType(), true);
   2574   DestVT = TLI.getValueType(I->getType(), true);
   2575 
   2576   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
   2577     return false;
   2578   if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
   2579     return false;
   2580 
   2581   unsigned SrcReg = getRegForValue(Op);
   2582   if (!SrcReg) return false;
   2583 
   2584   // Because the high bits are undefined, a truncate doesn't generate
   2585   // any code.
   2586   UpdateValueMap(I, SrcReg);
   2587   return true;
   2588 }
   2589 
   2590 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   2591                                     bool isZExt) {
   2592   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
   2593     return 0;
   2594   if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
   2595     return 0;
   2596 
   2597   // Table of which combinations can be emitted as a single instruction,
   2598   // and which will require two.
   2599   static const uint8_t isSingleInstrTbl[3][2][2][2] = {
   2600     //            ARM                     Thumb
   2601     //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
   2602     //    ext:     s  z      s  z          s  z      s  z
   2603     /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
   2604     /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
   2605     /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
   2606   };
   2607 
   2608   // Target registers for:
   2609   //  - For ARM can never be PC.
   2610   //  - For 16-bit Thumb are restricted to lower 8 registers.
   2611   //  - For 32-bit Thumb are restricted to non-SP and non-PC.
   2612   static const TargetRegisterClass *RCTbl[2][2] = {
   2613     // Instructions: Two                     Single
   2614     /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
   2615     /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
   2616   };
   2617 
   2618   // Table governing the instruction(s) to be emitted.
   2619   static const struct InstructionTable {
   2620     uint32_t Opc   : 16;
   2621     uint32_t hasS  :  1; // Some instructions have an S bit, always set it to 0.
   2622     uint32_t Shift :  7; // For shift operand addressing mode, used by MOVsi.
   2623     uint32_t Imm   :  8; // All instructions have either a shift or a mask.
   2624   } IT[2][2][3][2] = {
   2625     { // Two instructions (first is left shift, second is in this table).
   2626       { // ARM                Opc           S  Shift             Imm
   2627         /*  1 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  31 },
   2628         /*  1 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  31 } },
   2629         /*  8 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  24 },
   2630         /*  8 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  24 } },
   2631         /* 16 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  16 },
   2632         /* 16 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  16 } }
   2633       },
   2634       { // Thumb              Opc           S  Shift             Imm
   2635         /*  1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  31 },
   2636         /*  1 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  31 } },
   2637         /*  8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  24 },
   2638         /*  8 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  24 } },
   2639         /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  16 },
   2640         /* 16 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  16 } }
   2641       }
   2642     },
   2643     { // Single instruction.
   2644       { // ARM                Opc           S  Shift             Imm
   2645         /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
   2646         /*  1 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift,   1 } },
   2647         /*  8 bit sext */ { { ARM::SXTB   , 0, ARM_AM::no_shift,   0 },
   2648         /*  8 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift, 255 } },
   2649         /* 16 bit sext */ { { ARM::SXTH   , 0, ARM_AM::no_shift,   0 },
   2650         /* 16 bit zext */   { ARM::UXTH   , 0, ARM_AM::no_shift,   0 } }
   2651       },
   2652       { // Thumb              Opc           S  Shift             Imm
   2653         /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
   2654         /*  1 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift,   1 } },
   2655         /*  8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift,   0 },
   2656         /*  8 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
   2657         /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift,   0 },
   2658         /* 16 bit zext */   { ARM::t2UXTH , 0, ARM_AM::no_shift,   0 } }
   2659       }
   2660     }
   2661   };
   2662 
   2663   unsigned SrcBits = SrcVT.getSizeInBits();
   2664   unsigned DestBits = DestVT.getSizeInBits();
   2665   (void) DestBits;
   2666   assert((SrcBits < DestBits) && "can only extend to larger types");
   2667   assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
   2668          "other sizes unimplemented");
   2669   assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
   2670          "other sizes unimplemented");
   2671 
   2672   bool hasV6Ops = Subtarget->hasV6Ops();
   2673   unsigned Bitness = SrcBits / 8;  // {1,8,16}=>{0,1,2}
   2674   assert((Bitness < 3) && "sanity-check table bounds");
   2675 
   2676   bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
   2677   const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
   2678   const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
   2679   unsigned Opc = ITP->Opc;
   2680   assert(ARM::KILL != Opc && "Invalid table entry");
   2681   unsigned hasS = ITP->hasS;
   2682   ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
   2683   assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
   2684          "only MOVsi has shift operand addressing mode");
   2685   unsigned Imm = ITP->Imm;
   2686 
   2687   // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
   2688   bool setsCPSR = &ARM::tGPRRegClass == RC;
   2689   unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
   2690   unsigned ResultReg;
   2691   // MOVsi encodes shift and immediate in shift operand addressing mode.
   2692   // The following condition has the same value when emitting two
   2693   // instruction sequences: both are shifts.
   2694   bool ImmIsSO = (Shift != ARM_AM::no_shift);
   2695 
   2696   // Either one or two instructions are emitted.
   2697   // They're always of the form:
   2698   //   dst = in OP imm
   2699   // CPSR is set only by 16-bit Thumb instructions.
   2700   // Predicate, if any, is AL.
   2701   // S bit, if available, is always 0.
   2702   // When two are emitted the first's result will feed as the second's input,
   2703   // that value is then dead.
   2704   unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
   2705   for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
   2706     ResultReg = createResultReg(RC);
   2707     bool isLsl = (0 == Instr) && !isSingleInstr;
   2708     unsigned Opcode = isLsl ? LSLOpc : Opc;
   2709     ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
   2710     unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
   2711     bool isKill = 1 == Instr;
   2712     MachineInstrBuilder MIB = BuildMI(
   2713         *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
   2714     if (setsCPSR)
   2715       MIB.addReg(ARM::CPSR, RegState::Define);
   2716     SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
   2717     AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
   2718     if (hasS)
   2719       AddDefaultCC(MIB);
   2720     // Second instruction consumes the first's result.
   2721     SrcReg = ResultReg;
   2722   }
   2723 
   2724   return ResultReg;
   2725 }
   2726 
   2727 bool ARMFastISel::SelectIntExt(const Instruction *I) {
   2728   // On ARM, in general, integer casts don't involve legal types; this code
   2729   // handles promotable integers.
   2730   Type *DestTy = I->getType();
   2731   Value *Src = I->getOperand(0);
   2732   Type *SrcTy = Src->getType();
   2733 
   2734   bool isZExt = isa<ZExtInst>(I);
   2735   unsigned SrcReg = getRegForValue(Src);
   2736   if (!SrcReg) return false;
   2737 
   2738   EVT SrcEVT, DestEVT;
   2739   SrcEVT = TLI.getValueType(SrcTy, true);
   2740   DestEVT = TLI.getValueType(DestTy, true);
   2741   if (!SrcEVT.isSimple()) return false;
   2742   if (!DestEVT.isSimple()) return false;
   2743 
   2744   MVT SrcVT = SrcEVT.getSimpleVT();
   2745   MVT DestVT = DestEVT.getSimpleVT();
   2746   unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
   2747   if (ResultReg == 0) return false;
   2748   UpdateValueMap(I, ResultReg);
   2749   return true;
   2750 }
   2751 
   2752 bool ARMFastISel::SelectShift(const Instruction *I,
   2753                               ARM_AM::ShiftOpc ShiftTy) {
   2754   // We handle thumb2 mode by target independent selector
   2755   // or SelectionDAG ISel.
   2756   if (isThumb2)
   2757     return false;
   2758 
   2759   // Only handle i32 now.
   2760   EVT DestVT = TLI.getValueType(I->getType(), true);
   2761   if (DestVT != MVT::i32)
   2762     return false;
   2763 
   2764   unsigned Opc = ARM::MOVsr;
   2765   unsigned ShiftImm;
   2766   Value *Src2Value = I->getOperand(1);
   2767   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
   2768     ShiftImm = CI->getZExtValue();
   2769 
   2770     // Fall back to selection DAG isel if the shift amount
   2771     // is zero or greater than the width of the value type.
   2772     if (ShiftImm == 0 || ShiftImm >=32)
   2773       return false;
   2774 
   2775     Opc = ARM::MOVsi;
   2776   }
   2777 
   2778   Value *Src1Value = I->getOperand(0);
   2779   unsigned Reg1 = getRegForValue(Src1Value);
   2780   if (Reg1 == 0) return false;
   2781 
   2782   unsigned Reg2 = 0;
   2783   if (Opc == ARM::MOVsr) {
   2784     Reg2 = getRegForValue(Src2Value);
   2785     if (Reg2 == 0) return false;
   2786   }
   2787 
   2788   unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
   2789   if(ResultReg == 0) return false;
   2790 
   2791   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2792                                     TII.get(Opc), ResultReg)
   2793                             .addReg(Reg1);
   2794 
   2795   if (Opc == ARM::MOVsi)
   2796     MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
   2797   else if (Opc == ARM::MOVsr) {
   2798     MIB.addReg(Reg2);
   2799     MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
   2800   }
   2801 
   2802   AddOptionalDefs(MIB);
   2803   UpdateValueMap(I, ResultReg);
   2804   return true;
   2805 }
   2806 
   2807 // TODO: SoftFP support.
   2808 bool ARMFastISel::TargetSelectInstruction(const Instruction *I) {
   2809 
   2810   switch (I->getOpcode()) {
   2811     case Instruction::Load:
   2812       return SelectLoad(I);
   2813     case Instruction::Store:
   2814       return SelectStore(I);
   2815     case Instruction::Br:
   2816       return SelectBranch(I);
   2817     case Instruction::IndirectBr:
   2818       return SelectIndirectBr(I);
   2819     case Instruction::ICmp:
   2820     case Instruction::FCmp:
   2821       return SelectCmp(I);
   2822     case Instruction::FPExt:
   2823       return SelectFPExt(I);
   2824     case Instruction::FPTrunc:
   2825       return SelectFPTrunc(I);
   2826     case Instruction::SIToFP:
   2827       return SelectIToFP(I, /*isSigned*/ true);
   2828     case Instruction::UIToFP:
   2829       return SelectIToFP(I, /*isSigned*/ false);
   2830     case Instruction::FPToSI:
   2831       return SelectFPToI(I, /*isSigned*/ true);
   2832     case Instruction::FPToUI:
   2833       return SelectFPToI(I, /*isSigned*/ false);
   2834     case Instruction::Add:
   2835       return SelectBinaryIntOp(I, ISD::ADD);
   2836     case Instruction::Or:
   2837       return SelectBinaryIntOp(I, ISD::OR);
   2838     case Instruction::Sub:
   2839       return SelectBinaryIntOp(I, ISD::SUB);
   2840     case Instruction::FAdd:
   2841       return SelectBinaryFPOp(I, ISD::FADD);
   2842     case Instruction::FSub:
   2843       return SelectBinaryFPOp(I, ISD::FSUB);
   2844     case Instruction::FMul:
   2845       return SelectBinaryFPOp(I, ISD::FMUL);
   2846     case Instruction::SDiv:
   2847       return SelectDiv(I, /*isSigned*/ true);
   2848     case Instruction::UDiv:
   2849       return SelectDiv(I, /*isSigned*/ false);
   2850     case Instruction::SRem:
   2851       return SelectRem(I, /*isSigned*/ true);
   2852     case Instruction::URem:
   2853       return SelectRem(I, /*isSigned*/ false);
   2854     case Instruction::Call:
   2855       if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
   2856         return SelectIntrinsicCall(*II);
   2857       return SelectCall(I);
   2858     case Instruction::Select:
   2859       return SelectSelect(I);
   2860     case Instruction::Ret:
   2861       return SelectRet(I);
   2862     case Instruction::Trunc:
   2863       return SelectTrunc(I);
   2864     case Instruction::ZExt:
   2865     case Instruction::SExt:
   2866       return SelectIntExt(I);
   2867     case Instruction::Shl:
   2868       return SelectShift(I, ARM_AM::lsl);
   2869     case Instruction::LShr:
   2870       return SelectShift(I, ARM_AM::lsr);
   2871     case Instruction::AShr:
   2872       return SelectShift(I, ARM_AM::asr);
   2873     default: break;
   2874   }
   2875   return false;
   2876 }
   2877 
   2878 namespace {
   2879 // This table describes sign- and zero-extend instructions which can be
   2880 // folded into a preceding load. All of these extends have an immediate
   2881 // (sometimes a mask and sometimes a shift) that's applied after
   2882 // extension.
   2883 const struct FoldableLoadExtendsStruct {
   2884   uint16_t Opc[2];  // ARM, Thumb.
   2885   uint8_t ExpectedImm;
   2886   uint8_t isZExt     : 1;
   2887   uint8_t ExpectedVT : 7;
   2888 } FoldableLoadExtends[] = {
   2889   { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
   2890   { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
   2891   { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
   2892   { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
   2893   { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
   2894 };
   2895 }
   2896 
   2897 /// \brief The specified machine instr operand is a vreg, and that
   2898 /// vreg is being provided by the specified load instruction.  If possible,
   2899 /// try to fold the load as an operand to the instruction, returning true if
   2900 /// successful.
   2901 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   2902                                       const LoadInst *LI) {
   2903   // Verify we have a legal type before going any further.
   2904   MVT VT;
   2905   if (!isLoadTypeLegal(LI->getType(), VT))
   2906     return false;
   2907 
   2908   // Combine load followed by zero- or sign-extend.
   2909   // ldrb r1, [r0]       ldrb r1, [r0]
   2910   // uxtb r2, r1     =>
   2911   // mov  r3, r2         mov  r3, r1
   2912   if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
   2913     return false;
   2914   const uint64_t Imm = MI->getOperand(2).getImm();
   2915 
   2916   bool Found = false;
   2917   bool isZExt;
   2918   for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
   2919        i != e; ++i) {
   2920     if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
   2921         (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
   2922         MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
   2923       Found = true;
   2924       isZExt = FoldableLoadExtends[i].isZExt;
   2925     }
   2926   }
   2927   if (!Found) return false;
   2928 
   2929   // See if we can handle this address.
   2930   Address Addr;
   2931   if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
   2932 
   2933   unsigned ResultReg = MI->getOperand(0).getReg();
   2934   if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
   2935     return false;
   2936   MI->eraseFromParent();
   2937   return true;
   2938 }
   2939 
   2940 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   2941                                      unsigned Align, MVT VT) {
   2942   bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2943   ARMConstantPoolConstant *CPV =
   2944     ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2945   unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
   2946 
   2947   unsigned Opc;
   2948   unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
   2949   // Load value.
   2950   if (isThumb2) {
   2951     DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
   2952     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2953                             TII.get(ARM::t2LDRpci), DestReg1)
   2954                     .addConstantPoolIndex(Idx));
   2955     Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
   2956   } else {
   2957     // The extra immediate is for addrmode2.
   2958     DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
   2959     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2960                             DbgLoc, TII.get(ARM::LDRcp), DestReg1)
   2961                     .addConstantPoolIndex(Idx).addImm(0));
   2962     Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
   2963   }
   2964 
   2965   unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
   2966   if (GlobalBaseReg == 0) {
   2967     GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
   2968     AFI->setGlobalBaseReg(GlobalBaseReg);
   2969   }
   2970 
   2971   unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
   2972   DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
   2973   DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
   2974   GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
   2975   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2976                                     DbgLoc, TII.get(Opc), DestReg2)
   2977                             .addReg(DestReg1)
   2978                             .addReg(GlobalBaseReg);
   2979   if (!UseGOTOFF)
   2980     MIB.addImm(0);
   2981   AddOptionalDefs(MIB);
   2982 
   2983   return DestReg2;
   2984 }
   2985 
   2986 bool ARMFastISel::FastLowerArguments() {
   2987   if (!FuncInfo.CanLowerReturn)
   2988     return false;
   2989 
   2990   const Function *F = FuncInfo.Fn;
   2991   if (F->isVarArg())
   2992     return false;
   2993 
   2994   CallingConv::ID CC = F->getCallingConv();
   2995   switch (CC) {
   2996   default:
   2997     return false;
   2998   case CallingConv::Fast:
   2999   case CallingConv::C:
   3000   case CallingConv::ARM_AAPCS_VFP:
   3001   case CallingConv::ARM_AAPCS:
   3002   case CallingConv::ARM_APCS:
   3003     break;
   3004   }
   3005 
   3006   // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
   3007   // which are passed in r0 - r3.
   3008   unsigned Idx = 1;
   3009   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   3010        I != E; ++I, ++Idx) {
   3011     if (Idx > 4)
   3012       return false;
   3013 
   3014     if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
   3015         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
   3016         F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
   3017       return false;
   3018 
   3019     Type *ArgTy = I->getType();
   3020     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
   3021       return false;
   3022 
   3023     EVT ArgVT = TLI.getValueType(ArgTy);
   3024     if (!ArgVT.isSimple()) return false;
   3025     switch (ArgVT.getSimpleVT().SimpleTy) {
   3026     case MVT::i8:
   3027     case MVT::i16:
   3028     case MVT::i32:
   3029       break;
   3030     default:
   3031       return false;
   3032     }
   3033   }
   3034 
   3035 
   3036   static const uint16_t GPRArgRegs[] = {
   3037     ARM::R0, ARM::R1, ARM::R2, ARM::R3
   3038   };
   3039 
   3040   const TargetRegisterClass *RC = &ARM::rGPRRegClass;
   3041   Idx = 0;
   3042   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   3043        I != E; ++I, ++Idx) {
   3044     unsigned SrcReg = GPRArgRegs[Idx];
   3045     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
   3046     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
   3047     // Without this, EmitLiveInCopies may eliminate the livein if its only
   3048     // use is a bitcast (which isn't turned into an instruction).
   3049     unsigned ResultReg = createResultReg(RC);
   3050     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3051             TII.get(TargetOpcode::COPY),
   3052             ResultReg).addReg(DstReg, getKillRegState(true));
   3053     UpdateValueMap(I, ResultReg);
   3054   }
   3055 
   3056   return true;
   3057 }
   3058 
   3059 namespace llvm {
   3060   FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
   3061                                 const TargetLibraryInfo *libInfo) {
   3062     const TargetMachine &TM = funcInfo.MF->getTarget();
   3063 
   3064     const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>();
   3065     // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
   3066     bool UseFastISel = false;
   3067     UseFastISel |= Subtarget->isTargetMachO() && !Subtarget->isThumb1Only();
   3068     UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb();
   3069     UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb();
   3070 
   3071     if (UseFastISel) {
   3072       // iOS always has a FP for backtracking, force other targets
   3073       // to keep their FP when doing FastISel. The emitted code is
   3074       // currently superior, and in cases like test-suite's lencod
   3075       // FastISel isn't quite correct when FP is eliminated.
   3076       TM.Options.NoFramePointerElim = true;
   3077       return new ARMFastISel(funcInfo, libInfo);
   3078     }
   3079     return nullptr;
   3080   }
   3081 }
   3082