Home | History | Annotate | Download | only in ARM
      1 //===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the ARM-specific support for the FastISel class. Some
     11 // of the target-specific code is generated by tablegen in the file
     12 // ARMGenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "ARM.h"
     17 #include "ARMBaseRegisterInfo.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMISelLowering.h"
     21 #include "ARMMachineFunctionInfo.h"
     22 #include "ARMSubtarget.h"
     23 #include "MCTargetDesc/ARMAddressingModes.h"
     24 #include "llvm/ADT/STLExtras.h"
     25 #include "llvm/CodeGen/Analysis.h"
     26 #include "llvm/CodeGen/FastISel.h"
     27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     28 #include "llvm/CodeGen/MachineConstantPool.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineInstrBuilder.h"
     31 #include "llvm/CodeGen/MachineMemOperand.h"
     32 #include "llvm/CodeGen/MachineModuleInfo.h"
     33 #include "llvm/CodeGen/MachineRegisterInfo.h"
     34 #include "llvm/IR/CallSite.h"
     35 #include "llvm/IR/CallingConv.h"
     36 #include "llvm/IR/DataLayout.h"
     37 #include "llvm/IR/DerivedTypes.h"
     38 #include "llvm/IR/GetElementPtrTypeIterator.h"
     39 #include "llvm/IR/GlobalVariable.h"
     40 #include "llvm/IR/Instructions.h"
     41 #include "llvm/IR/IntrinsicInst.h"
     42 #include "llvm/IR/Module.h"
     43 #include "llvm/IR/Operator.h"
     44 #include "llvm/Support/CommandLine.h"
     45 #include "llvm/Support/ErrorHandling.h"
     46 #include "llvm/Target/TargetInstrInfo.h"
     47 #include "llvm/Target/TargetLowering.h"
     48 #include "llvm/Target/TargetMachine.h"
     49 #include "llvm/Target/TargetOptions.h"
     50 using namespace llvm;
     51 
     52 extern cl::opt<bool> EnableARMLongCalls;
     53 
     54 namespace {
     55 
     56   // All possible address modes, plus some.
     57   typedef struct Address {
     58     enum {
     59       RegBase,
     60       FrameIndexBase
     61     } BaseType;
     62 
     63     union {
     64       unsigned Reg;
     65       int FI;
     66     } Base;
     67 
     68     int Offset;
     69 
     70     // Innocuous defaults for our address.
     71     Address()
     72      : BaseType(RegBase), Offset(0) {
     73        Base.Reg = 0;
     74      }
     75   } Address;
     76 
     77 class ARMFastISel final : public FastISel {
     78 
     79   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     80   /// make the right decision when generating code for different targets.
     81   const ARMSubtarget *Subtarget;
     82   Module &M;
     83   const TargetMachine &TM;
     84   const TargetInstrInfo &TII;
     85   const TargetLowering &TLI;
     86   ARMFunctionInfo *AFI;
     87 
     88   // Convenience variables to avoid some queries.
     89   bool isThumb2;
     90   LLVMContext *Context;
     91 
     92   public:
     93     explicit ARMFastISel(FunctionLoweringInfo &funcInfo,
     94                          const TargetLibraryInfo *libInfo)
     95         : FastISel(funcInfo, libInfo),
     96           Subtarget(
     97               &static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget())),
     98           M(const_cast<Module &>(*funcInfo.Fn->getParent())),
     99           TM(funcInfo.MF->getTarget()), TII(*Subtarget->getInstrInfo()),
    100           TLI(*Subtarget->getTargetLowering()) {
    101       AFI = funcInfo.MF->getInfo<ARMFunctionInfo>();
    102       isThumb2 = AFI->isThumbFunction();
    103       Context = &funcInfo.Fn->getContext();
    104     }
    105 
    106     // Code from FastISel.cpp.
    107   private:
    108     unsigned fastEmitInst_r(unsigned MachineInstOpcode,
    109                             const TargetRegisterClass *RC,
    110                             unsigned Op0, bool Op0IsKill);
    111     unsigned fastEmitInst_rr(unsigned MachineInstOpcode,
    112                              const TargetRegisterClass *RC,
    113                              unsigned Op0, bool Op0IsKill,
    114                              unsigned Op1, bool Op1IsKill);
    115     unsigned fastEmitInst_rrr(unsigned MachineInstOpcode,
    116                               const TargetRegisterClass *RC,
    117                               unsigned Op0, bool Op0IsKill,
    118                               unsigned Op1, bool Op1IsKill,
    119                               unsigned Op2, bool Op2IsKill);
    120     unsigned fastEmitInst_ri(unsigned MachineInstOpcode,
    121                              const TargetRegisterClass *RC,
    122                              unsigned Op0, bool Op0IsKill,
    123                              uint64_t Imm);
    124     unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
    125                               const TargetRegisterClass *RC,
    126                               unsigned Op0, bool Op0IsKill,
    127                               unsigned Op1, bool Op1IsKill,
    128                               uint64_t Imm);
    129     unsigned fastEmitInst_i(unsigned MachineInstOpcode,
    130                             const TargetRegisterClass *RC,
    131                             uint64_t Imm);
    132 
    133     // Backend specific FastISel code.
    134   private:
    135     bool fastSelectInstruction(const Instruction *I) override;
    136     unsigned fastMaterializeConstant(const Constant *C) override;
    137     unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
    138     bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
    139                              const LoadInst *LI) override;
    140     bool fastLowerArguments() override;
    141   private:
    142   #include "ARMGenFastISel.inc"
    143 
    144     // Instruction selection routines.
    145   private:
    146     bool SelectLoad(const Instruction *I);
    147     bool SelectStore(const Instruction *I);
    148     bool SelectBranch(const Instruction *I);
    149     bool SelectIndirectBr(const Instruction *I);
    150     bool SelectCmp(const Instruction *I);
    151     bool SelectFPExt(const Instruction *I);
    152     bool SelectFPTrunc(const Instruction *I);
    153     bool SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode);
    154     bool SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode);
    155     bool SelectIToFP(const Instruction *I, bool isSigned);
    156     bool SelectFPToI(const Instruction *I, bool isSigned);
    157     bool SelectDiv(const Instruction *I, bool isSigned);
    158     bool SelectRem(const Instruction *I, bool isSigned);
    159     bool SelectCall(const Instruction *I, const char *IntrMemName);
    160     bool SelectIntrinsicCall(const IntrinsicInst &I);
    161     bool SelectSelect(const Instruction *I);
    162     bool SelectRet(const Instruction *I);
    163     bool SelectTrunc(const Instruction *I);
    164     bool SelectIntExt(const Instruction *I);
    165     bool SelectShift(const Instruction *I, ARM_AM::ShiftOpc ShiftTy);
    166 
    167     // Utility routines.
    168   private:
    169     bool isTypeLegal(Type *Ty, MVT &VT);
    170     bool isLoadTypeLegal(Type *Ty, MVT &VT);
    171     bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
    172                     bool isZExt);
    173     bool ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    174                      unsigned Alignment = 0, bool isZExt = true,
    175                      bool allocReg = true);
    176     bool ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
    177                       unsigned Alignment = 0);
    178     bool ARMComputeAddress(const Value *Obj, Address &Addr);
    179     void ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3);
    180     bool ARMIsMemCpySmall(uint64_t Len);
    181     bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
    182                                unsigned Alignment);
    183     unsigned ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
    184     unsigned ARMMaterializeFP(const ConstantFP *CFP, MVT VT);
    185     unsigned ARMMaterializeInt(const Constant *C, MVT VT);
    186     unsigned ARMMaterializeGV(const GlobalValue *GV, MVT VT);
    187     unsigned ARMMoveToFPReg(MVT VT, unsigned SrcReg);
    188     unsigned ARMMoveToIntReg(MVT VT, unsigned SrcReg);
    189     unsigned ARMSelectCallOp(bool UseReg);
    190     unsigned ARMLowerPICELF(const GlobalValue *GV, unsigned Align, MVT VT);
    191 
    192     const TargetLowering *getTargetLowering() { return &TLI; }
    193 
    194     // Call handling routines.
    195   private:
    196     CCAssignFn *CCAssignFnForCall(CallingConv::ID CC,
    197                                   bool Return,
    198                                   bool isVarArg);
    199     bool ProcessCallArgs(SmallVectorImpl<Value*> &Args,
    200                          SmallVectorImpl<unsigned> &ArgRegs,
    201                          SmallVectorImpl<MVT> &ArgVTs,
    202                          SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
    203                          SmallVectorImpl<unsigned> &RegArgs,
    204                          CallingConv::ID CC,
    205                          unsigned &NumBytes,
    206                          bool isVarArg);
    207     unsigned getLibcallReg(const Twine &Name);
    208     bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
    209                     const Instruction *I, CallingConv::ID CC,
    210                     unsigned &NumBytes, bool isVarArg);
    211     bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call);
    212 
    213     // OptionalDef handling routines.
    214   private:
    215     bool isARMNEONPred(const MachineInstr *MI);
    216     bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR);
    217     const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB);
    218     void AddLoadStoreOperands(MVT VT, Address &Addr,
    219                               const MachineInstrBuilder &MIB,
    220                               unsigned Flags, bool useAM3);
    221 };
    222 
    223 } // end anonymous namespace
    224 
    225 #include "ARMGenCallingConv.inc"
    226 
    227 // DefinesOptionalPredicate - This is different from DefinesPredicate in that
    228 // we don't care about implicit defs here, just places we'll need to add a
    229 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
    230 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
    231   if (!MI->hasOptionalDef())
    232     return false;
    233 
    234   // Look to see if our OptionalDef is defining CPSR or CCR.
    235   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    236     const MachineOperand &MO = MI->getOperand(i);
    237     if (!MO.isReg() || !MO.isDef()) continue;
    238     if (MO.getReg() == ARM::CPSR)
    239       *CPSR = true;
    240   }
    241   return true;
    242 }
    243 
    244 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
    245   const MCInstrDesc &MCID = MI->getDesc();
    246 
    247   // If we're a thumb2 or not NEON function we'll be handled via isPredicable.
    248   if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
    249        AFI->isThumb2Function())
    250     return MI->isPredicable();
    251 
    252   for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
    253     if (MCID.OpInfo[i].isPredicate())
    254       return true;
    255 
    256   return false;
    257 }
    258 
    259 // If the machine is predicable go ahead and add the predicate operands, if
    260 // it needs default CC operands add those.
    261 // TODO: If we want to support thumb1 then we'll need to deal with optional
    262 // CPSR defs that need to be added before the remaining operands. See s_cc_out
    263 // for descriptions why.
    264 const MachineInstrBuilder &
    265 ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
    266   MachineInstr *MI = &*MIB;
    267 
    268   // Do we use a predicate? or...
    269   // Are we NEON in ARM mode and have a predicate operand? If so, I know
    270   // we're not predicable but add it anyways.
    271   if (isARMNEONPred(MI))
    272     AddDefaultPred(MIB);
    273 
    274   // Do we optionally set a predicate?  Preds is size > 0 iff the predicate
    275   // defines CPSR. All other OptionalDefines in ARM are the CCR register.
    276   bool CPSR = false;
    277   if (DefinesOptionalPredicate(MI, &CPSR)) {
    278     if (CPSR)
    279       AddDefaultT1CC(MIB);
    280     else
    281       AddDefaultCC(MIB);
    282   }
    283   return MIB;
    284 }
    285 
    286 unsigned ARMFastISel::fastEmitInst_r(unsigned MachineInstOpcode,
    287                                      const TargetRegisterClass *RC,
    288                                      unsigned Op0, bool Op0IsKill) {
    289   unsigned ResultReg = createResultReg(RC);
    290   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    291 
    292   // Make sure the input operand is sufficiently constrained to be legal
    293   // for this instruction.
    294   Op0 = constrainOperandRegClass(II, Op0, 1);
    295   if (II.getNumDefs() >= 1) {
    296     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    297                             ResultReg).addReg(Op0, Op0IsKill * RegState::Kill));
    298   } else {
    299     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    300                    .addReg(Op0, Op0IsKill * RegState::Kill));
    301     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    302                    TII.get(TargetOpcode::COPY), ResultReg)
    303                    .addReg(II.ImplicitDefs[0]));
    304   }
    305   return ResultReg;
    306 }
    307 
    308 unsigned ARMFastISel::fastEmitInst_rr(unsigned MachineInstOpcode,
    309                                       const TargetRegisterClass *RC,
    310                                       unsigned Op0, bool Op0IsKill,
    311                                       unsigned Op1, bool Op1IsKill) {
    312   unsigned ResultReg = createResultReg(RC);
    313   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    314 
    315   // Make sure the input operands are sufficiently constrained to be legal
    316   // for this instruction.
    317   Op0 = constrainOperandRegClass(II, Op0, 1);
    318   Op1 = constrainOperandRegClass(II, Op1, 2);
    319 
    320   if (II.getNumDefs() >= 1) {
    321     AddOptionalDefs(
    322         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    323             .addReg(Op0, Op0IsKill * RegState::Kill)
    324             .addReg(Op1, Op1IsKill * RegState::Kill));
    325   } else {
    326     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    327                    .addReg(Op0, Op0IsKill * RegState::Kill)
    328                    .addReg(Op1, Op1IsKill * RegState::Kill));
    329     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    330                            TII.get(TargetOpcode::COPY), ResultReg)
    331                    .addReg(II.ImplicitDefs[0]));
    332   }
    333   return ResultReg;
    334 }
    335 
    336 unsigned ARMFastISel::fastEmitInst_rrr(unsigned MachineInstOpcode,
    337                                        const TargetRegisterClass *RC,
    338                                        unsigned Op0, bool Op0IsKill,
    339                                        unsigned Op1, bool Op1IsKill,
    340                                        unsigned Op2, bool Op2IsKill) {
    341   unsigned ResultReg = createResultReg(RC);
    342   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    343 
    344   // Make sure the input operands are sufficiently constrained to be legal
    345   // for this instruction.
    346   Op0 = constrainOperandRegClass(II, Op0, 1);
    347   Op1 = constrainOperandRegClass(II, Op1, 2);
    348   Op2 = constrainOperandRegClass(II, Op1, 3);
    349 
    350   if (II.getNumDefs() >= 1) {
    351     AddOptionalDefs(
    352         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    353             .addReg(Op0, Op0IsKill * RegState::Kill)
    354             .addReg(Op1, Op1IsKill * RegState::Kill)
    355             .addReg(Op2, Op2IsKill * RegState::Kill));
    356   } else {
    357     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    358                    .addReg(Op0, Op0IsKill * RegState::Kill)
    359                    .addReg(Op1, Op1IsKill * RegState::Kill)
    360                    .addReg(Op2, Op2IsKill * RegState::Kill));
    361     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    362                            TII.get(TargetOpcode::COPY), ResultReg)
    363                    .addReg(II.ImplicitDefs[0]));
    364   }
    365   return ResultReg;
    366 }
    367 
    368 unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
    369                                       const TargetRegisterClass *RC,
    370                                       unsigned Op0, bool Op0IsKill,
    371                                       uint64_t Imm) {
    372   unsigned ResultReg = createResultReg(RC);
    373   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    374 
    375   // Make sure the input operand is sufficiently constrained to be legal
    376   // for this instruction.
    377   Op0 = constrainOperandRegClass(II, Op0, 1);
    378   if (II.getNumDefs() >= 1) {
    379     AddOptionalDefs(
    380         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    381             .addReg(Op0, Op0IsKill * RegState::Kill)
    382             .addImm(Imm));
    383   } else {
    384     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    385                    .addReg(Op0, Op0IsKill * RegState::Kill)
    386                    .addImm(Imm));
    387     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    388                            TII.get(TargetOpcode::COPY), ResultReg)
    389                    .addReg(II.ImplicitDefs[0]));
    390   }
    391   return ResultReg;
    392 }
    393 
    394 unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
    395                                        const TargetRegisterClass *RC,
    396                                        unsigned Op0, bool Op0IsKill,
    397                                        unsigned Op1, bool Op1IsKill,
    398                                        uint64_t Imm) {
    399   unsigned ResultReg = createResultReg(RC);
    400   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    401 
    402   // Make sure the input operands are sufficiently constrained to be legal
    403   // for this instruction.
    404   Op0 = constrainOperandRegClass(II, Op0, 1);
    405   Op1 = constrainOperandRegClass(II, Op1, 2);
    406   if (II.getNumDefs() >= 1) {
    407     AddOptionalDefs(
    408         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
    409             .addReg(Op0, Op0IsKill * RegState::Kill)
    410             .addReg(Op1, Op1IsKill * RegState::Kill)
    411             .addImm(Imm));
    412   } else {
    413     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    414                    .addReg(Op0, Op0IsKill * RegState::Kill)
    415                    .addReg(Op1, Op1IsKill * RegState::Kill)
    416                    .addImm(Imm));
    417     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    418                            TII.get(TargetOpcode::COPY), ResultReg)
    419                    .addReg(II.ImplicitDefs[0]));
    420   }
    421   return ResultReg;
    422 }
    423 
    424 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
    425                                      const TargetRegisterClass *RC,
    426                                      uint64_t Imm) {
    427   unsigned ResultReg = createResultReg(RC);
    428   const MCInstrDesc &II = TII.get(MachineInstOpcode);
    429 
    430   if (II.getNumDefs() >= 1) {
    431     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II,
    432                             ResultReg).addImm(Imm));
    433   } else {
    434     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
    435                    .addImm(Imm));
    436     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    437                            TII.get(TargetOpcode::COPY), ResultReg)
    438                    .addReg(II.ImplicitDefs[0]));
    439   }
    440   return ResultReg;
    441 }
    442 
    443 // TODO: Don't worry about 64-bit now, but when this is fixed remove the
    444 // checks from the various callers.
    445 unsigned ARMFastISel::ARMMoveToFPReg(MVT VT, unsigned SrcReg) {
    446   if (VT == MVT::f64) return 0;
    447 
    448   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
    449   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    450                           TII.get(ARM::VMOVSR), MoveReg)
    451                   .addReg(SrcReg));
    452   return MoveReg;
    453 }
    454 
    455 unsigned ARMFastISel::ARMMoveToIntReg(MVT VT, unsigned SrcReg) {
    456   if (VT == MVT::i64) return 0;
    457 
    458   unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT));
    459   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    460                           TII.get(ARM::VMOVRS), MoveReg)
    461                   .addReg(SrcReg));
    462   return MoveReg;
    463 }
    464 
    465 // For double width floating point we need to materialize two constants
    466 // (the high and the low) into integer registers then use a move to get
    467 // the combined constant into an FP reg.
    468 unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, MVT VT) {
    469   const APFloat Val = CFP->getValueAPF();
    470   bool is64bit = VT == MVT::f64;
    471 
    472   // This checks to see if we can use VFP3 instructions to materialize
    473   // a constant, otherwise we have to go through the constant pool.
    474   if (TLI.isFPImmLegal(Val, VT)) {
    475     int Imm;
    476     unsigned Opc;
    477     if (is64bit) {
    478       Imm = ARM_AM::getFP64Imm(Val);
    479       Opc = ARM::FCONSTD;
    480     } else {
    481       Imm = ARM_AM::getFP32Imm(Val);
    482       Opc = ARM::FCONSTS;
    483     }
    484     unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
    485     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    486                             TII.get(Opc), DestReg).addImm(Imm));
    487     return DestReg;
    488   }
    489 
    490   // Require VFP2 for loading fp constants.
    491   if (!Subtarget->hasVFP2()) return false;
    492 
    493   // MachineConstantPool wants an explicit alignment.
    494   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
    495   if (Align == 0) {
    496     // TODO: Figure out if this is correct.
    497     Align = DL.getTypeAllocSize(CFP->getType());
    498   }
    499   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
    500   unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
    501   unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS;
    502 
    503   // The extra reg is for addrmode5.
    504   AddOptionalDefs(
    505       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
    506           .addConstantPoolIndex(Idx)
    507           .addReg(0));
    508   return DestReg;
    509 }
    510 
    511 unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
    512 
    513   if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1)
    514     return 0;
    515 
    516   // If we can do this in a single instruction without a constant pool entry
    517   // do so now.
    518   const ConstantInt *CI = cast<ConstantInt>(C);
    519   if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) {
    520     unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16;
    521     const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
    522       &ARM::GPRRegClass;
    523     unsigned ImmReg = createResultReg(RC);
    524     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    525                             TII.get(Opc), ImmReg)
    526                     .addImm(CI->getZExtValue()));
    527     return ImmReg;
    528   }
    529 
    530   // Use MVN to emit negative constants.
    531   if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) {
    532     unsigned Imm = (unsigned)~(CI->getSExtValue());
    533     bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
    534       (ARM_AM::getSOImmVal(Imm) != -1);
    535     if (UseImm) {
    536       unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi;
    537       const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass :
    538                                                  &ARM::GPRRegClass;
    539       unsigned ImmReg = createResultReg(RC);
    540       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    541                               TII.get(Opc), ImmReg)
    542                       .addImm(Imm));
    543       return ImmReg;
    544     }
    545   }
    546 
    547   unsigned ResultReg = 0;
    548   if (Subtarget->useMovt(*FuncInfo.MF))
    549     ResultReg = fastEmit_i(VT, VT, ISD::Constant, CI->getZExtValue());
    550 
    551   if (ResultReg)
    552     return ResultReg;
    553 
    554   // Load from constant pool.  For now 32-bit only.
    555   if (VT != MVT::i32)
    556     return 0;
    557 
    558   // MachineConstantPool wants an explicit alignment.
    559   unsigned Align = DL.getPrefTypeAlignment(C->getType());
    560   if (Align == 0) {
    561     // TODO: Figure out if this is correct.
    562     Align = DL.getTypeAllocSize(C->getType());
    563   }
    564   unsigned Idx = MCP.getConstantPoolIndex(C, Align);
    565   ResultReg = createResultReg(TLI.getRegClassFor(VT));
    566   if (isThumb2)
    567     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    568                             TII.get(ARM::t2LDRpci), ResultReg)
    569                       .addConstantPoolIndex(Idx));
    570   else {
    571     // The extra immediate is for addrmode2.
    572     ResultReg = constrainOperandRegClass(TII.get(ARM::LDRcp), ResultReg, 0);
    573     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    574                             TII.get(ARM::LDRcp), ResultReg)
    575                       .addConstantPoolIndex(Idx)
    576                       .addImm(0));
    577   }
    578   return ResultReg;
    579 }
    580 
    581 unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
    582   // For now 32-bit only.
    583   if (VT != MVT::i32) return 0;
    584 
    585   Reloc::Model RelocM = TM.getRelocationModel();
    586   bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
    587   const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
    588                                            : &ARM::GPRRegClass;
    589   unsigned DestReg = createResultReg(RC);
    590 
    591   // FastISel TLS support on non-MachO is broken, punt to SelectionDAG.
    592   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
    593   bool IsThreadLocal = GVar && GVar->isThreadLocal();
    594   if (!Subtarget->isTargetMachO() && IsThreadLocal) return 0;
    595 
    596   // Use movw+movt when possible, it avoids constant pool entries.
    597   // Non-darwin targets only support static movt relocations in FastISel.
    598   if (Subtarget->useMovt(*FuncInfo.MF) &&
    599       (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) {
    600     unsigned Opc;
    601     unsigned char TF = 0;
    602     if (Subtarget->isTargetMachO())
    603       TF = ARMII::MO_NONLAZY;
    604 
    605     switch (RelocM) {
    606     case Reloc::PIC_:
    607       Opc = isThumb2 ? ARM::t2MOV_ga_pcrel : ARM::MOV_ga_pcrel;
    608       break;
    609     default:
    610       Opc = isThumb2 ? ARM::t2MOVi32imm : ARM::MOVi32imm;
    611       break;
    612     }
    613     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    614                             TII.get(Opc), DestReg).addGlobalAddress(GV, 0, TF));
    615   } else {
    616     // MachineConstantPool wants an explicit alignment.
    617     unsigned Align = DL.getPrefTypeAlignment(GV->getType());
    618     if (Align == 0) {
    619       // TODO: Figure out if this is correct.
    620       Align = DL.getTypeAllocSize(GV->getType());
    621     }
    622 
    623     if (Subtarget->isTargetELF() && RelocM == Reloc::PIC_)
    624       return ARMLowerPICELF(GV, Align, VT);
    625 
    626     // Grab index.
    627     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 :
    628       (Subtarget->isThumb() ? 4 : 8);
    629     unsigned Id = AFI->createPICLabelUId();
    630     ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id,
    631                                                                 ARMCP::CPValue,
    632                                                                 PCAdj);
    633     unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
    634 
    635     // Load value.
    636     MachineInstrBuilder MIB;
    637     if (isThumb2) {
    638       unsigned Opc = (RelocM!=Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic;
    639       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
    640                     DestReg).addConstantPoolIndex(Idx);
    641       if (RelocM == Reloc::PIC_)
    642         MIB.addImm(Id);
    643       AddOptionalDefs(MIB);
    644     } else {
    645       // The extra immediate is for addrmode2.
    646       DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0);
    647       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    648                     TII.get(ARM::LDRcp), DestReg)
    649                 .addConstantPoolIndex(Idx)
    650                 .addImm(0);
    651       AddOptionalDefs(MIB);
    652 
    653       if (RelocM == Reloc::PIC_) {
    654         unsigned Opc = IsIndirect ? ARM::PICLDR : ARM::PICADD;
    655         unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
    656 
    657         MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
    658                                           DbgLoc, TII.get(Opc), NewDestReg)
    659                                   .addReg(DestReg)
    660                                   .addImm(Id);
    661         AddOptionalDefs(MIB);
    662         return NewDestReg;
    663       }
    664     }
    665   }
    666 
    667   if (IsIndirect) {
    668     MachineInstrBuilder MIB;
    669     unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT));
    670     if (isThumb2)
    671       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    672                     TII.get(ARM::t2LDRi12), NewDestReg)
    673             .addReg(DestReg)
    674             .addImm(0);
    675     else
    676       MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    677                     TII.get(ARM::LDRi12), NewDestReg)
    678                 .addReg(DestReg)
    679                 .addImm(0);
    680     DestReg = NewDestReg;
    681     AddOptionalDefs(MIB);
    682   }
    683 
    684   return DestReg;
    685 }
    686 
    687 unsigned ARMFastISel::fastMaterializeConstant(const Constant *C) {
    688   EVT CEVT = TLI.getValueType(C->getType(), true);
    689 
    690   // Only handle simple types.
    691   if (!CEVT.isSimple()) return 0;
    692   MVT VT = CEVT.getSimpleVT();
    693 
    694   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
    695     return ARMMaterializeFP(CFP, VT);
    696   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
    697     return ARMMaterializeGV(GV, VT);
    698   else if (isa<ConstantInt>(C))
    699     return ARMMaterializeInt(C, VT);
    700 
    701   return 0;
    702 }
    703 
    704 // TODO: unsigned ARMFastISel::TargetMaterializeFloatZero(const ConstantFP *CF);
    705 
    706 unsigned ARMFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
    707   // Don't handle dynamic allocas.
    708   if (!FuncInfo.StaticAllocaMap.count(AI)) return 0;
    709 
    710   MVT VT;
    711   if (!isLoadTypeLegal(AI->getType(), VT)) return 0;
    712 
    713   DenseMap<const AllocaInst*, int>::iterator SI =
    714     FuncInfo.StaticAllocaMap.find(AI);
    715 
    716   // This will get lowered later into the correct offsets and registers
    717   // via rewriteXFrameIndex.
    718   if (SI != FuncInfo.StaticAllocaMap.end()) {
    719     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
    720     const TargetRegisterClass* RC = TLI.getRegClassFor(VT);
    721     unsigned ResultReg = createResultReg(RC);
    722     ResultReg = constrainOperandRegClass(TII.get(Opc), ResultReg, 0);
    723 
    724     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    725                             TII.get(Opc), ResultReg)
    726                             .addFrameIndex(SI->second)
    727                             .addImm(0));
    728     return ResultReg;
    729   }
    730 
    731   return 0;
    732 }
    733 
    734 bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) {
    735   EVT evt = TLI.getValueType(Ty, true);
    736 
    737   // Only handle simple types.
    738   if (evt == MVT::Other || !evt.isSimple()) return false;
    739   VT = evt.getSimpleVT();
    740 
    741   // Handle all legal types, i.e. a register that will directly hold this
    742   // value.
    743   return TLI.isTypeLegal(VT);
    744 }
    745 
    746 bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) {
    747   if (isTypeLegal(Ty, VT)) return true;
    748 
    749   // If this is a type than can be sign or zero-extended to a basic operation
    750   // go ahead and accept it now.
    751   if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16)
    752     return true;
    753 
    754   return false;
    755 }
    756 
    757 // Computes the address to get to an object.
    758 bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
    759   // Some boilerplate from the X86 FastISel.
    760   const User *U = nullptr;
    761   unsigned Opcode = Instruction::UserOp1;
    762   if (const Instruction *I = dyn_cast<Instruction>(Obj)) {
    763     // Don't walk into other basic blocks unless the object is an alloca from
    764     // another block, otherwise it may not have a virtual register assigned.
    765     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) ||
    766         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    767       Opcode = I->getOpcode();
    768       U = I;
    769     }
    770   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) {
    771     Opcode = C->getOpcode();
    772     U = C;
    773   }
    774 
    775   if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType()))
    776     if (Ty->getAddressSpace() > 255)
    777       // Fast instruction selection doesn't support the special
    778       // address spaces.
    779       return false;
    780 
    781   switch (Opcode) {
    782     default:
    783     break;
    784     case Instruction::BitCast:
    785       // Look through bitcasts.
    786       return ARMComputeAddress(U->getOperand(0), Addr);
    787     case Instruction::IntToPtr:
    788       // Look past no-op inttoptrs.
    789       if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    790         return ARMComputeAddress(U->getOperand(0), Addr);
    791       break;
    792     case Instruction::PtrToInt:
    793       // Look past no-op ptrtoints.
    794       if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    795         return ARMComputeAddress(U->getOperand(0), Addr);
    796       break;
    797     case Instruction::GetElementPtr: {
    798       Address SavedAddr = Addr;
    799       int TmpOffset = Addr.Offset;
    800 
    801       // Iterate through the GEP folding the constants into offsets where
    802       // we can.
    803       gep_type_iterator GTI = gep_type_begin(U);
    804       for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
    805            i != e; ++i, ++GTI) {
    806         const Value *Op = *i;
    807         if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    808           const StructLayout *SL = DL.getStructLayout(STy);
    809           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
    810           TmpOffset += SL->getElementOffset(Idx);
    811         } else {
    812           uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    813           for (;;) {
    814             if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    815               // Constant-offset addressing.
    816               TmpOffset += CI->getSExtValue() * S;
    817               break;
    818             }
    819             if (canFoldAddIntoGEP(U, Op)) {
    820               // A compatible add with a constant operand. Fold the constant.
    821               ConstantInt *CI =
    822               cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    823               TmpOffset += CI->getSExtValue() * S;
    824               // Iterate on the other operand.
    825               Op = cast<AddOperator>(Op)->getOperand(0);
    826               continue;
    827             }
    828             // Unsupported
    829             goto unsupported_gep;
    830           }
    831         }
    832       }
    833 
    834       // Try to grab the base operand now.
    835       Addr.Offset = TmpOffset;
    836       if (ARMComputeAddress(U->getOperand(0), Addr)) return true;
    837 
    838       // We failed, restore everything and try the other options.
    839       Addr = SavedAddr;
    840 
    841       unsupported_gep:
    842       break;
    843     }
    844     case Instruction::Alloca: {
    845       const AllocaInst *AI = cast<AllocaInst>(Obj);
    846       DenseMap<const AllocaInst*, int>::iterator SI =
    847         FuncInfo.StaticAllocaMap.find(AI);
    848       if (SI != FuncInfo.StaticAllocaMap.end()) {
    849         Addr.BaseType = Address::FrameIndexBase;
    850         Addr.Base.FI = SI->second;
    851         return true;
    852       }
    853       break;
    854     }
    855   }
    856 
    857   // Try to get this in a register if nothing else has worked.
    858   if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj);
    859   return Addr.Base.Reg != 0;
    860 }
    861 
    862 void ARMFastISel::ARMSimplifyAddress(Address &Addr, MVT VT, bool useAM3) {
    863   bool needsLowering = false;
    864   switch (VT.SimpleTy) {
    865     default: llvm_unreachable("Unhandled load/store type!");
    866     case MVT::i1:
    867     case MVT::i8:
    868     case MVT::i16:
    869     case MVT::i32:
    870       if (!useAM3) {
    871         // Integer loads/stores handle 12-bit offsets.
    872         needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset);
    873         // Handle negative offsets.
    874         if (needsLowering && isThumb2)
    875           needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 &&
    876                             Addr.Offset > -256);
    877       } else {
    878         // ARM halfword load/stores and signed byte loads use +/-imm8 offsets.
    879         needsLowering = (Addr.Offset > 255 || Addr.Offset < -255);
    880       }
    881       break;
    882     case MVT::f32:
    883     case MVT::f64:
    884       // Floating point operands handle 8-bit offsets.
    885       needsLowering = ((Addr.Offset & 0xff) != Addr.Offset);
    886       break;
    887   }
    888 
    889   // If this is a stack pointer and the offset needs to be simplified then
    890   // put the alloca address into a register, set the base type back to
    891   // register and continue. This should almost never happen.
    892   if (needsLowering && Addr.BaseType == Address::FrameIndexBase) {
    893     const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
    894                                              : &ARM::GPRRegClass;
    895     unsigned ResultReg = createResultReg(RC);
    896     unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri;
    897     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    898                             TII.get(Opc), ResultReg)
    899                             .addFrameIndex(Addr.Base.FI)
    900                             .addImm(0));
    901     Addr.Base.Reg = ResultReg;
    902     Addr.BaseType = Address::RegBase;
    903   }
    904 
    905   // Since the offset is too large for the load/store instruction
    906   // get the reg+offset into a register.
    907   if (needsLowering) {
    908     Addr.Base.Reg = fastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg,
    909                                  /*Op0IsKill*/false, Addr.Offset, MVT::i32);
    910     Addr.Offset = 0;
    911   }
    912 }
    913 
    914 void ARMFastISel::AddLoadStoreOperands(MVT VT, Address &Addr,
    915                                        const MachineInstrBuilder &MIB,
    916                                        unsigned Flags, bool useAM3) {
    917   // addrmode5 output depends on the selection dag addressing dividing the
    918   // offset by 4 that it then later multiplies. Do this here as well.
    919   if (VT.SimpleTy == MVT::f32 || VT.SimpleTy == MVT::f64)
    920     Addr.Offset /= 4;
    921 
    922   // Frame base works a bit differently. Handle it separately.
    923   if (Addr.BaseType == Address::FrameIndexBase) {
    924     int FI = Addr.Base.FI;
    925     int Offset = Addr.Offset;
    926     MachineMemOperand *MMO =
    927           FuncInfo.MF->getMachineMemOperand(
    928                                   MachinePointerInfo::getFixedStack(FI, Offset),
    929                                   Flags,
    930                                   MFI.getObjectSize(FI),
    931                                   MFI.getObjectAlignment(FI));
    932     // Now add the rest of the operands.
    933     MIB.addFrameIndex(FI);
    934 
    935     // ARM halfword load/stores and signed byte loads need an additional
    936     // operand.
    937     if (useAM3) {
    938       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
    939       MIB.addReg(0);
    940       MIB.addImm(Imm);
    941     } else {
    942       MIB.addImm(Addr.Offset);
    943     }
    944     MIB.addMemOperand(MMO);
    945   } else {
    946     // Now add the rest of the operands.
    947     MIB.addReg(Addr.Base.Reg);
    948 
    949     // ARM halfword load/stores and signed byte loads need an additional
    950     // operand.
    951     if (useAM3) {
    952       signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset;
    953       MIB.addReg(0);
    954       MIB.addImm(Imm);
    955     } else {
    956       MIB.addImm(Addr.Offset);
    957     }
    958   }
    959   AddOptionalDefs(MIB);
    960 }
    961 
    962 bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
    963                               unsigned Alignment, bool isZExt, bool allocReg) {
    964   unsigned Opc;
    965   bool useAM3 = false;
    966   bool needVMOV = false;
    967   const TargetRegisterClass *RC;
    968   switch (VT.SimpleTy) {
    969     // This is mostly going to be Neon/vector support.
    970     default: return false;
    971     case MVT::i1:
    972     case MVT::i8:
    973       if (isThumb2) {
    974         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
    975           Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8;
    976         else
    977           Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12;
    978       } else {
    979         if (isZExt) {
    980           Opc = ARM::LDRBi12;
    981         } else {
    982           Opc = ARM::LDRSB;
    983           useAM3 = true;
    984         }
    985       }
    986       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
    987       break;
    988     case MVT::i16:
    989       if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
    990         return false;
    991 
    992       if (isThumb2) {
    993         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
    994           Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8;
    995         else
    996           Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12;
    997       } else {
    998         Opc = isZExt ? ARM::LDRH : ARM::LDRSH;
    999         useAM3 = true;
   1000       }
   1001       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
   1002       break;
   1003     case MVT::i32:
   1004       if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
   1005         return false;
   1006 
   1007       if (isThumb2) {
   1008         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1009           Opc = ARM::t2LDRi8;
   1010         else
   1011           Opc = ARM::t2LDRi12;
   1012       } else {
   1013         Opc = ARM::LDRi12;
   1014       }
   1015       RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
   1016       break;
   1017     case MVT::f32:
   1018       if (!Subtarget->hasVFP2()) return false;
   1019       // Unaligned loads need special handling. Floats require word-alignment.
   1020       if (Alignment && Alignment < 4) {
   1021         needVMOV = true;
   1022         VT = MVT::i32;
   1023         Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
   1024         RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass;
   1025       } else {
   1026         Opc = ARM::VLDRS;
   1027         RC = TLI.getRegClassFor(VT);
   1028       }
   1029       break;
   1030     case MVT::f64:
   1031       if (!Subtarget->hasVFP2()) return false;
   1032       // FIXME: Unaligned loads need special handling.  Doublewords require
   1033       // word-alignment.
   1034       if (Alignment && Alignment < 4)
   1035         return false;
   1036 
   1037       Opc = ARM::VLDRD;
   1038       RC = TLI.getRegClassFor(VT);
   1039       break;
   1040   }
   1041   // Simplify this down to something we can handle.
   1042   ARMSimplifyAddress(Addr, VT, useAM3);
   1043 
   1044   // Create the base instruction, then add the operands.
   1045   if (allocReg)
   1046     ResultReg = createResultReg(RC);
   1047   assert (ResultReg > 255 && "Expected an allocated virtual register.");
   1048   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1049                                     TII.get(Opc), ResultReg);
   1050   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3);
   1051 
   1052   // If we had an unaligned load of a float we've converted it to an regular
   1053   // load.  Now we must move from the GRP to the FP register.
   1054   if (needVMOV) {
   1055     unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::f32));
   1056     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1057                             TII.get(ARM::VMOVSR), MoveReg)
   1058                     .addReg(ResultReg));
   1059     ResultReg = MoveReg;
   1060   }
   1061   return true;
   1062 }
   1063 
   1064 bool ARMFastISel::SelectLoad(const Instruction *I) {
   1065   // Atomic loads need special handling.
   1066   if (cast<LoadInst>(I)->isAtomic())
   1067     return false;
   1068 
   1069   // Verify we have a legal type before going any further.
   1070   MVT VT;
   1071   if (!isLoadTypeLegal(I->getType(), VT))
   1072     return false;
   1073 
   1074   // See if we can handle this address.
   1075   Address Addr;
   1076   if (!ARMComputeAddress(I->getOperand(0), Addr)) return false;
   1077 
   1078   unsigned ResultReg;
   1079   if (!ARMEmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment()))
   1080     return false;
   1081   updateValueMap(I, ResultReg);
   1082   return true;
   1083 }
   1084 
   1085 bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
   1086                                unsigned Alignment) {
   1087   unsigned StrOpc;
   1088   bool useAM3 = false;
   1089   switch (VT.SimpleTy) {
   1090     // This is mostly going to be Neon/vector support.
   1091     default: return false;
   1092     case MVT::i1: {
   1093       unsigned Res = createResultReg(isThumb2 ? &ARM::tGPRRegClass
   1094                                               : &ARM::GPRRegClass);
   1095       unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri;
   1096       SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1);
   1097       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1098                               TII.get(Opc), Res)
   1099                       .addReg(SrcReg).addImm(1));
   1100       SrcReg = Res;
   1101     } // Fallthrough here.
   1102     case MVT::i8:
   1103       if (isThumb2) {
   1104         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1105           StrOpc = ARM::t2STRBi8;
   1106         else
   1107           StrOpc = ARM::t2STRBi12;
   1108       } else {
   1109         StrOpc = ARM::STRBi12;
   1110       }
   1111       break;
   1112     case MVT::i16:
   1113       if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem())
   1114         return false;
   1115 
   1116       if (isThumb2) {
   1117         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1118           StrOpc = ARM::t2STRHi8;
   1119         else
   1120           StrOpc = ARM::t2STRHi12;
   1121       } else {
   1122         StrOpc = ARM::STRH;
   1123         useAM3 = true;
   1124       }
   1125       break;
   1126     case MVT::i32:
   1127       if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem())
   1128         return false;
   1129 
   1130       if (isThumb2) {
   1131         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
   1132           StrOpc = ARM::t2STRi8;
   1133         else
   1134           StrOpc = ARM::t2STRi12;
   1135       } else {
   1136         StrOpc = ARM::STRi12;
   1137       }
   1138       break;
   1139     case MVT::f32:
   1140       if (!Subtarget->hasVFP2()) return false;
   1141       // Unaligned stores need special handling. Floats require word-alignment.
   1142       if (Alignment && Alignment < 4) {
   1143         unsigned MoveReg = createResultReg(TLI.getRegClassFor(MVT::i32));
   1144         AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1145                                 TII.get(ARM::VMOVRS), MoveReg)
   1146                         .addReg(SrcReg));
   1147         SrcReg = MoveReg;
   1148         VT = MVT::i32;
   1149         StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12;
   1150       } else {
   1151         StrOpc = ARM::VSTRS;
   1152       }
   1153       break;
   1154     case MVT::f64:
   1155       if (!Subtarget->hasVFP2()) return false;
   1156       // FIXME: Unaligned stores need special handling.  Doublewords require
   1157       // word-alignment.
   1158       if (Alignment && Alignment < 4)
   1159           return false;
   1160 
   1161       StrOpc = ARM::VSTRD;
   1162       break;
   1163   }
   1164   // Simplify this down to something we can handle.
   1165   ARMSimplifyAddress(Addr, VT, useAM3);
   1166 
   1167   // Create the base instruction, then add the operands.
   1168   SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0);
   1169   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1170                                     TII.get(StrOpc))
   1171                             .addReg(SrcReg);
   1172   AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3);
   1173   return true;
   1174 }
   1175 
   1176 bool ARMFastISel::SelectStore(const Instruction *I) {
   1177   Value *Op0 = I->getOperand(0);
   1178   unsigned SrcReg = 0;
   1179 
   1180   // Atomic stores need special handling.
   1181   if (cast<StoreInst>(I)->isAtomic())
   1182     return false;
   1183 
   1184   // Verify we have a legal type before going any further.
   1185   MVT VT;
   1186   if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT))
   1187     return false;
   1188 
   1189   // Get the value to be stored into a register.
   1190   SrcReg = getRegForValue(Op0);
   1191   if (SrcReg == 0) return false;
   1192 
   1193   // See if we can handle this address.
   1194   Address Addr;
   1195   if (!ARMComputeAddress(I->getOperand(1), Addr))
   1196     return false;
   1197 
   1198   if (!ARMEmitStore(VT, SrcReg, Addr, cast<StoreInst>(I)->getAlignment()))
   1199     return false;
   1200   return true;
   1201 }
   1202 
   1203 static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) {
   1204   switch (Pred) {
   1205     // Needs two compares...
   1206     case CmpInst::FCMP_ONE:
   1207     case CmpInst::FCMP_UEQ:
   1208     default:
   1209       // AL is our "false" for now. The other two need more compares.
   1210       return ARMCC::AL;
   1211     case CmpInst::ICMP_EQ:
   1212     case CmpInst::FCMP_OEQ:
   1213       return ARMCC::EQ;
   1214     case CmpInst::ICMP_SGT:
   1215     case CmpInst::FCMP_OGT:
   1216       return ARMCC::GT;
   1217     case CmpInst::ICMP_SGE:
   1218     case CmpInst::FCMP_OGE:
   1219       return ARMCC::GE;
   1220     case CmpInst::ICMP_UGT:
   1221     case CmpInst::FCMP_UGT:
   1222       return ARMCC::HI;
   1223     case CmpInst::FCMP_OLT:
   1224       return ARMCC::MI;
   1225     case CmpInst::ICMP_ULE:
   1226     case CmpInst::FCMP_OLE:
   1227       return ARMCC::LS;
   1228     case CmpInst::FCMP_ORD:
   1229       return ARMCC::VC;
   1230     case CmpInst::FCMP_UNO:
   1231       return ARMCC::VS;
   1232     case CmpInst::FCMP_UGE:
   1233       return ARMCC::PL;
   1234     case CmpInst::ICMP_SLT:
   1235     case CmpInst::FCMP_ULT:
   1236       return ARMCC::LT;
   1237     case CmpInst::ICMP_SLE:
   1238     case CmpInst::FCMP_ULE:
   1239       return ARMCC::LE;
   1240     case CmpInst::FCMP_UNE:
   1241     case CmpInst::ICMP_NE:
   1242       return ARMCC::NE;
   1243     case CmpInst::ICMP_UGE:
   1244       return ARMCC::HS;
   1245     case CmpInst::ICMP_ULT:
   1246       return ARMCC::LO;
   1247   }
   1248 }
   1249 
   1250 bool ARMFastISel::SelectBranch(const Instruction *I) {
   1251   const BranchInst *BI = cast<BranchInst>(I);
   1252   MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   1253   MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   1254 
   1255   // Simple branch support.
   1256 
   1257   // If we can, avoid recomputing the compare - redoing it could lead to wonky
   1258   // behavior.
   1259   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
   1260     if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
   1261 
   1262       // Get the compare predicate.
   1263       // Try to take advantage of fallthrough opportunities.
   1264       CmpInst::Predicate Predicate = CI->getPredicate();
   1265       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1266         std::swap(TBB, FBB);
   1267         Predicate = CmpInst::getInversePredicate(Predicate);
   1268       }
   1269 
   1270       ARMCC::CondCodes ARMPred = getComparePred(Predicate);
   1271 
   1272       // We may not handle every CC for now.
   1273       if (ARMPred == ARMCC::AL) return false;
   1274 
   1275       // Emit the compare.
   1276       if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   1277         return false;
   1278 
   1279       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1280       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1281       .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR);
   1282       fastEmitBranch(FBB, DbgLoc);
   1283       FuncInfo.MBB->addSuccessor(TBB);
   1284       return true;
   1285     }
   1286   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
   1287     MVT SourceVT;
   1288     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
   1289         (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) {
   1290       unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
   1291       unsigned OpReg = getRegForValue(TI->getOperand(0));
   1292       OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0);
   1293       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1294                               TII.get(TstOpc))
   1295                       .addReg(OpReg).addImm(1));
   1296 
   1297       unsigned CCMode = ARMCC::NE;
   1298       if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1299         std::swap(TBB, FBB);
   1300         CCMode = ARMCC::EQ;
   1301       }
   1302 
   1303       unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1304       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1305       .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   1306 
   1307       fastEmitBranch(FBB, DbgLoc);
   1308       FuncInfo.MBB->addSuccessor(TBB);
   1309       return true;
   1310     }
   1311   } else if (const ConstantInt *CI =
   1312              dyn_cast<ConstantInt>(BI->getCondition())) {
   1313     uint64_t Imm = CI->getZExtValue();
   1314     MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB;
   1315     fastEmitBranch(Target, DbgLoc);
   1316     return true;
   1317   }
   1318 
   1319   unsigned CmpReg = getRegForValue(BI->getCondition());
   1320   if (CmpReg == 0) return false;
   1321 
   1322   // We've been divorced from our compare!  Our block was split, and
   1323   // now our compare lives in a predecessor block.  We musn't
   1324   // re-compare here, as the children of the compare aren't guaranteed
   1325   // live across the block boundary (we *could* check for this).
   1326   // Regardless, the compare has been done in the predecessor block,
   1327   // and it left a value for us in a virtual register.  Ergo, we test
   1328   // the one-bit value left in the virtual register.
   1329   unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri;
   1330   CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0);
   1331   AddOptionalDefs(
   1332       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TstOpc))
   1333           .addReg(CmpReg)
   1334           .addImm(1));
   1335 
   1336   unsigned CCMode = ARMCC::NE;
   1337   if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
   1338     std::swap(TBB, FBB);
   1339     CCMode = ARMCC::EQ;
   1340   }
   1341 
   1342   unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc;
   1343   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BrOpc))
   1344                   .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR);
   1345   fastEmitBranch(FBB, DbgLoc);
   1346   FuncInfo.MBB->addSuccessor(TBB);
   1347   return true;
   1348 }
   1349 
   1350 bool ARMFastISel::SelectIndirectBr(const Instruction *I) {
   1351   unsigned AddrReg = getRegForValue(I->getOperand(0));
   1352   if (AddrReg == 0) return false;
   1353 
   1354   unsigned Opc = isThumb2 ? ARM::tBRIND : ARM::BX;
   1355   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1356                           TII.get(Opc)).addReg(AddrReg));
   1357 
   1358   const IndirectBrInst *IB = cast<IndirectBrInst>(I);
   1359   for (unsigned i = 0, e = IB->getNumSuccessors(); i != e; ++i)
   1360     FuncInfo.MBB->addSuccessor(FuncInfo.MBBMap[IB->getSuccessor(i)]);
   1361 
   1362   return true;
   1363 }
   1364 
   1365 bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value,
   1366                              bool isZExt) {
   1367   Type *Ty = Src1Value->getType();
   1368   EVT SrcEVT = TLI.getValueType(Ty, true);
   1369   if (!SrcEVT.isSimple()) return false;
   1370   MVT SrcVT = SrcEVT.getSimpleVT();
   1371 
   1372   bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy());
   1373   if (isFloat && !Subtarget->hasVFP2())
   1374     return false;
   1375 
   1376   // Check to see if the 2nd operand is a constant that we can encode directly
   1377   // in the compare.
   1378   int Imm = 0;
   1379   bool UseImm = false;
   1380   bool isNegativeImm = false;
   1381   // FIXME: At -O0 we don't have anything that canonicalizes operand order.
   1382   // Thus, Src1Value may be a ConstantInt, but we're missing it.
   1383   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(Src2Value)) {
   1384     if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 ||
   1385         SrcVT == MVT::i1) {
   1386       const APInt &CIVal = ConstInt->getValue();
   1387       Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue();
   1388       // For INT_MIN/LONG_MIN (i.e., 0x80000000) we need to use a cmp, rather
   1389       // then a cmn, because there is no way to represent 2147483648 as a
   1390       // signed 32-bit int.
   1391       if (Imm < 0 && Imm != (int)0x80000000) {
   1392         isNegativeImm = true;
   1393         Imm = -Imm;
   1394       }
   1395       UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
   1396         (ARM_AM::getSOImmVal(Imm) != -1);
   1397     }
   1398   } else if (const ConstantFP *ConstFP = dyn_cast<ConstantFP>(Src2Value)) {
   1399     if (SrcVT == MVT::f32 || SrcVT == MVT::f64)
   1400       if (ConstFP->isZero() && !ConstFP->isNegative())
   1401         UseImm = true;
   1402   }
   1403 
   1404   unsigned CmpOpc;
   1405   bool isICmp = true;
   1406   bool needsExt = false;
   1407   switch (SrcVT.SimpleTy) {
   1408     default: return false;
   1409     // TODO: Verify compares.
   1410     case MVT::f32:
   1411       isICmp = false;
   1412       CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES;
   1413       break;
   1414     case MVT::f64:
   1415       isICmp = false;
   1416       CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED;
   1417       break;
   1418     case MVT::i1:
   1419     case MVT::i8:
   1420     case MVT::i16:
   1421       needsExt = true;
   1422     // Intentional fall-through.
   1423     case MVT::i32:
   1424       if (isThumb2) {
   1425         if (!UseImm)
   1426           CmpOpc = ARM::t2CMPrr;
   1427         else
   1428           CmpOpc = isNegativeImm ? ARM::t2CMNri : ARM::t2CMPri;
   1429       } else {
   1430         if (!UseImm)
   1431           CmpOpc = ARM::CMPrr;
   1432         else
   1433           CmpOpc = isNegativeImm ? ARM::CMNri : ARM::CMPri;
   1434       }
   1435       break;
   1436   }
   1437 
   1438   unsigned SrcReg1 = getRegForValue(Src1Value);
   1439   if (SrcReg1 == 0) return false;
   1440 
   1441   unsigned SrcReg2 = 0;
   1442   if (!UseImm) {
   1443     SrcReg2 = getRegForValue(Src2Value);
   1444     if (SrcReg2 == 0) return false;
   1445   }
   1446 
   1447   // We have i1, i8, or i16, we need to either zero extend or sign extend.
   1448   if (needsExt) {
   1449     SrcReg1 = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt);
   1450     if (SrcReg1 == 0) return false;
   1451     if (!UseImm) {
   1452       SrcReg2 = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt);
   1453       if (SrcReg2 == 0) return false;
   1454     }
   1455   }
   1456 
   1457   const MCInstrDesc &II = TII.get(CmpOpc);
   1458   SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0);
   1459   if (!UseImm) {
   1460     SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1);
   1461     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   1462                     .addReg(SrcReg1).addReg(SrcReg2));
   1463   } else {
   1464     MachineInstrBuilder MIB;
   1465     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   1466       .addReg(SrcReg1);
   1467 
   1468     // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0.
   1469     if (isICmp)
   1470       MIB.addImm(Imm);
   1471     AddOptionalDefs(MIB);
   1472   }
   1473 
   1474   // For floating point we need to move the result to a comparison register
   1475   // that we can then use for branches.
   1476   if (Ty->isFloatTy() || Ty->isDoubleTy())
   1477     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1478                             TII.get(ARM::FMSTAT)));
   1479   return true;
   1480 }
   1481 
   1482 bool ARMFastISel::SelectCmp(const Instruction *I) {
   1483   const CmpInst *CI = cast<CmpInst>(I);
   1484 
   1485   // Get the compare predicate.
   1486   ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate());
   1487 
   1488   // We may not handle every CC for now.
   1489   if (ARMPred == ARMCC::AL) return false;
   1490 
   1491   // Emit the compare.
   1492   if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned()))
   1493     return false;
   1494 
   1495   // Now set a register based on the comparison. Explicitly set the predicates
   1496   // here.
   1497   unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
   1498   const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
   1499                                            : &ARM::GPRRegClass;
   1500   unsigned DestReg = createResultReg(RC);
   1501   Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0);
   1502   unsigned ZeroReg = fastMaterializeConstant(Zero);
   1503   // ARMEmitCmp emits a FMSTAT when necessary, so it's always safe to use CPSR.
   1504   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc), DestReg)
   1505           .addReg(ZeroReg).addImm(1)
   1506           .addImm(ARMPred).addReg(ARM::CPSR);
   1507 
   1508   updateValueMap(I, DestReg);
   1509   return true;
   1510 }
   1511 
   1512 bool ARMFastISel::SelectFPExt(const Instruction *I) {
   1513   // Make sure we have VFP and that we're extending float to double.
   1514   if (!Subtarget->hasVFP2()) return false;
   1515 
   1516   Value *V = I->getOperand(0);
   1517   if (!I->getType()->isDoubleTy() ||
   1518       !V->getType()->isFloatTy()) return false;
   1519 
   1520   unsigned Op = getRegForValue(V);
   1521   if (Op == 0) return false;
   1522 
   1523   unsigned Result = createResultReg(&ARM::DPRRegClass);
   1524   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1525                           TII.get(ARM::VCVTDS), Result)
   1526                   .addReg(Op));
   1527   updateValueMap(I, Result);
   1528   return true;
   1529 }
   1530 
   1531 bool ARMFastISel::SelectFPTrunc(const Instruction *I) {
   1532   // Make sure we have VFP and that we're truncating double to float.
   1533   if (!Subtarget->hasVFP2()) return false;
   1534 
   1535   Value *V = I->getOperand(0);
   1536   if (!(I->getType()->isFloatTy() &&
   1537         V->getType()->isDoubleTy())) return false;
   1538 
   1539   unsigned Op = getRegForValue(V);
   1540   if (Op == 0) return false;
   1541 
   1542   unsigned Result = createResultReg(&ARM::SPRRegClass);
   1543   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1544                           TII.get(ARM::VCVTSD), Result)
   1545                   .addReg(Op));
   1546   updateValueMap(I, Result);
   1547   return true;
   1548 }
   1549 
   1550 bool ARMFastISel::SelectIToFP(const Instruction *I, bool isSigned) {
   1551   // Make sure we have VFP.
   1552   if (!Subtarget->hasVFP2()) return false;
   1553 
   1554   MVT DstVT;
   1555   Type *Ty = I->getType();
   1556   if (!isTypeLegal(Ty, DstVT))
   1557     return false;
   1558 
   1559   Value *Src = I->getOperand(0);
   1560   EVT SrcEVT = TLI.getValueType(Src->getType(), true);
   1561   if (!SrcEVT.isSimple())
   1562     return false;
   1563   MVT SrcVT = SrcEVT.getSimpleVT();
   1564   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
   1565     return false;
   1566 
   1567   unsigned SrcReg = getRegForValue(Src);
   1568   if (SrcReg == 0) return false;
   1569 
   1570   // Handle sign-extension.
   1571   if (SrcVT == MVT::i16 || SrcVT == MVT::i8) {
   1572     SrcReg = ARMEmitIntExt(SrcVT, SrcReg, MVT::i32,
   1573                                        /*isZExt*/!isSigned);
   1574     if (SrcReg == 0) return false;
   1575   }
   1576 
   1577   // The conversion routine works on fp-reg to fp-reg and the operand above
   1578   // was an integer, move it to the fp registers if possible.
   1579   unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg);
   1580   if (FP == 0) return false;
   1581 
   1582   unsigned Opc;
   1583   if (Ty->isFloatTy()) Opc = isSigned ? ARM::VSITOS : ARM::VUITOS;
   1584   else if (Ty->isDoubleTy()) Opc = isSigned ? ARM::VSITOD : ARM::VUITOD;
   1585   else return false;
   1586 
   1587   unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT));
   1588   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1589                           TII.get(Opc), ResultReg).addReg(FP));
   1590   updateValueMap(I, ResultReg);
   1591   return true;
   1592 }
   1593 
   1594 bool ARMFastISel::SelectFPToI(const Instruction *I, bool isSigned) {
   1595   // Make sure we have VFP.
   1596   if (!Subtarget->hasVFP2()) return false;
   1597 
   1598   MVT DstVT;
   1599   Type *RetTy = I->getType();
   1600   if (!isTypeLegal(RetTy, DstVT))
   1601     return false;
   1602 
   1603   unsigned Op = getRegForValue(I->getOperand(0));
   1604   if (Op == 0) return false;
   1605 
   1606   unsigned Opc;
   1607   Type *OpTy = I->getOperand(0)->getType();
   1608   if (OpTy->isFloatTy()) Opc = isSigned ? ARM::VTOSIZS : ARM::VTOUIZS;
   1609   else if (OpTy->isDoubleTy()) Opc = isSigned ? ARM::VTOSIZD : ARM::VTOUIZD;
   1610   else return false;
   1611 
   1612   // f64->s32/u32 or f32->s32/u32 both need an intermediate f32 reg.
   1613   unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32));
   1614   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1615                           TII.get(Opc), ResultReg).addReg(Op));
   1616 
   1617   // This result needs to be in an integer register, but the conversion only
   1618   // takes place in fp-regs.
   1619   unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg);
   1620   if (IntReg == 0) return false;
   1621 
   1622   updateValueMap(I, IntReg);
   1623   return true;
   1624 }
   1625 
   1626 bool ARMFastISel::SelectSelect(const Instruction *I) {
   1627   MVT VT;
   1628   if (!isTypeLegal(I->getType(), VT))
   1629     return false;
   1630 
   1631   // Things need to be register sized for register moves.
   1632   if (VT != MVT::i32) return false;
   1633 
   1634   unsigned CondReg = getRegForValue(I->getOperand(0));
   1635   if (CondReg == 0) return false;
   1636   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1637   if (Op1Reg == 0) return false;
   1638 
   1639   // Check to see if we can use an immediate in the conditional move.
   1640   int Imm = 0;
   1641   bool UseImm = false;
   1642   bool isNegativeImm = false;
   1643   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(I->getOperand(2))) {
   1644     assert (VT == MVT::i32 && "Expecting an i32.");
   1645     Imm = (int)ConstInt->getValue().getZExtValue();
   1646     if (Imm < 0) {
   1647       isNegativeImm = true;
   1648       Imm = ~Imm;
   1649     }
   1650     UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) :
   1651       (ARM_AM::getSOImmVal(Imm) != -1);
   1652   }
   1653 
   1654   unsigned Op2Reg = 0;
   1655   if (!UseImm) {
   1656     Op2Reg = getRegForValue(I->getOperand(2));
   1657     if (Op2Reg == 0) return false;
   1658   }
   1659 
   1660   unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri;
   1661   CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0);
   1662   AddOptionalDefs(
   1663       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
   1664           .addReg(CondReg)
   1665           .addImm(0));
   1666 
   1667   unsigned MovCCOpc;
   1668   const TargetRegisterClass *RC;
   1669   if (!UseImm) {
   1670     RC = isThumb2 ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
   1671     MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr;
   1672   } else {
   1673     RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass;
   1674     if (!isNegativeImm)
   1675       MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi;
   1676     else
   1677       MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi;
   1678   }
   1679   unsigned ResultReg = createResultReg(RC);
   1680   if (!UseImm) {
   1681     Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1);
   1682     Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2);
   1683     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
   1684             ResultReg)
   1685         .addReg(Op2Reg)
   1686         .addReg(Op1Reg)
   1687         .addImm(ARMCC::NE)
   1688         .addReg(ARM::CPSR);
   1689   } else {
   1690     Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1);
   1691     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovCCOpc),
   1692             ResultReg)
   1693         .addReg(Op1Reg)
   1694         .addImm(Imm)
   1695         .addImm(ARMCC::EQ)
   1696         .addReg(ARM::CPSR);
   1697   }
   1698   updateValueMap(I, ResultReg);
   1699   return true;
   1700 }
   1701 
   1702 bool ARMFastISel::SelectDiv(const Instruction *I, bool isSigned) {
   1703   MVT VT;
   1704   Type *Ty = I->getType();
   1705   if (!isTypeLegal(Ty, VT))
   1706     return false;
   1707 
   1708   // If we have integer div support we should have selected this automagically.
   1709   // In case we have a real miss go ahead and return false and we'll pick
   1710   // it up later.
   1711   if (Subtarget->hasDivide()) return false;
   1712 
   1713   // Otherwise emit a libcall.
   1714   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   1715   if (VT == MVT::i8)
   1716     LC = isSigned ? RTLIB::SDIV_I8 : RTLIB::UDIV_I8;
   1717   else if (VT == MVT::i16)
   1718     LC = isSigned ? RTLIB::SDIV_I16 : RTLIB::UDIV_I16;
   1719   else if (VT == MVT::i32)
   1720     LC = isSigned ? RTLIB::SDIV_I32 : RTLIB::UDIV_I32;
   1721   else if (VT == MVT::i64)
   1722     LC = isSigned ? RTLIB::SDIV_I64 : RTLIB::UDIV_I64;
   1723   else if (VT == MVT::i128)
   1724     LC = isSigned ? RTLIB::SDIV_I128 : RTLIB::UDIV_I128;
   1725   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
   1726 
   1727   return ARMEmitLibcall(I, LC);
   1728 }
   1729 
   1730 bool ARMFastISel::SelectRem(const Instruction *I, bool isSigned) {
   1731   MVT VT;
   1732   Type *Ty = I->getType();
   1733   if (!isTypeLegal(Ty, VT))
   1734     return false;
   1735 
   1736   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   1737   if (VT == MVT::i8)
   1738     LC = isSigned ? RTLIB::SREM_I8 : RTLIB::UREM_I8;
   1739   else if (VT == MVT::i16)
   1740     LC = isSigned ? RTLIB::SREM_I16 : RTLIB::UREM_I16;
   1741   else if (VT == MVT::i32)
   1742     LC = isSigned ? RTLIB::SREM_I32 : RTLIB::UREM_I32;
   1743   else if (VT == MVT::i64)
   1744     LC = isSigned ? RTLIB::SREM_I64 : RTLIB::UREM_I64;
   1745   else if (VT == MVT::i128)
   1746     LC = isSigned ? RTLIB::SREM_I128 : RTLIB::UREM_I128;
   1747   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
   1748 
   1749   return ARMEmitLibcall(I, LC);
   1750 }
   1751 
   1752 bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) {
   1753   EVT DestVT  = TLI.getValueType(I->getType(), true);
   1754 
   1755   // We can get here in the case when we have a binary operation on a non-legal
   1756   // type and the target independent selector doesn't know how to handle it.
   1757   if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
   1758     return false;
   1759 
   1760   unsigned Opc;
   1761   switch (ISDOpcode) {
   1762     default: return false;
   1763     case ISD::ADD:
   1764       Opc = isThumb2 ? ARM::t2ADDrr : ARM::ADDrr;
   1765       break;
   1766     case ISD::OR:
   1767       Opc = isThumb2 ? ARM::t2ORRrr : ARM::ORRrr;
   1768       break;
   1769     case ISD::SUB:
   1770       Opc = isThumb2 ? ARM::t2SUBrr : ARM::SUBrr;
   1771       break;
   1772   }
   1773 
   1774   unsigned SrcReg1 = getRegForValue(I->getOperand(0));
   1775   if (SrcReg1 == 0) return false;
   1776 
   1777   // TODO: Often the 2nd operand is an immediate, which can be encoded directly
   1778   // in the instruction, rather then materializing the value in a register.
   1779   unsigned SrcReg2 = getRegForValue(I->getOperand(1));
   1780   if (SrcReg2 == 0) return false;
   1781 
   1782   unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
   1783   SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1);
   1784   SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2);
   1785   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1786                           TII.get(Opc), ResultReg)
   1787                   .addReg(SrcReg1).addReg(SrcReg2));
   1788   updateValueMap(I, ResultReg);
   1789   return true;
   1790 }
   1791 
   1792 bool ARMFastISel::SelectBinaryFPOp(const Instruction *I, unsigned ISDOpcode) {
   1793   EVT FPVT = TLI.getValueType(I->getType(), true);
   1794   if (!FPVT.isSimple()) return false;
   1795   MVT VT = FPVT.getSimpleVT();
   1796 
   1797   // We can get here in the case when we want to use NEON for our fp
   1798   // operations, but can't figure out how to. Just use the vfp instructions
   1799   // if we have them.
   1800   // FIXME: It'd be nice to use NEON instructions.
   1801   Type *Ty = I->getType();
   1802   bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy());
   1803   if (isFloat && !Subtarget->hasVFP2())
   1804     return false;
   1805 
   1806   unsigned Opc;
   1807   bool is64bit = VT == MVT::f64 || VT == MVT::i64;
   1808   switch (ISDOpcode) {
   1809     default: return false;
   1810     case ISD::FADD:
   1811       Opc = is64bit ? ARM::VADDD : ARM::VADDS;
   1812       break;
   1813     case ISD::FSUB:
   1814       Opc = is64bit ? ARM::VSUBD : ARM::VSUBS;
   1815       break;
   1816     case ISD::FMUL:
   1817       Opc = is64bit ? ARM::VMULD : ARM::VMULS;
   1818       break;
   1819   }
   1820   unsigned Op1 = getRegForValue(I->getOperand(0));
   1821   if (Op1 == 0) return false;
   1822 
   1823   unsigned Op2 = getRegForValue(I->getOperand(1));
   1824   if (Op2 == 0) return false;
   1825 
   1826   unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT.SimpleTy));
   1827   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1828                           TII.get(Opc), ResultReg)
   1829                   .addReg(Op1).addReg(Op2));
   1830   updateValueMap(I, ResultReg);
   1831   return true;
   1832 }
   1833 
   1834 // Call Handling Code
   1835 
   1836 // This is largely taken directly from CCAssignFnForNode
   1837 // TODO: We may not support all of this.
   1838 CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
   1839                                            bool Return,
   1840                                            bool isVarArg) {
   1841   switch (CC) {
   1842   default:
   1843     llvm_unreachable("Unsupported calling convention");
   1844   case CallingConv::Fast:
   1845     if (Subtarget->hasVFP2() && !isVarArg) {
   1846       if (!Subtarget->isAAPCS_ABI())
   1847         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1848       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1849       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1850     }
   1851     // Fallthrough
   1852   case CallingConv::C:
   1853     // Use target triple & subtarget features to do actual dispatch.
   1854     if (Subtarget->isAAPCS_ABI()) {
   1855       if (Subtarget->hasVFP2() &&
   1856           TM.Options.FloatABIType == FloatABI::Hard && !isVarArg)
   1857         return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   1858       else
   1859         return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   1860     } else
   1861         return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   1862   case CallingConv::ARM_AAPCS_VFP:
   1863     if (!isVarArg)
   1864       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
   1865     // Fall through to soft float variant, variadic functions don't
   1866     // use hard floating point ABI.
   1867   case CallingConv::ARM_AAPCS:
   1868     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   1869   case CallingConv::ARM_APCS:
   1870     return (Return ? RetCC_ARM_APCS: CC_ARM_APCS);
   1871   case CallingConv::GHC:
   1872     if (Return)
   1873       llvm_unreachable("Can't return in GHC call convention");
   1874     else
   1875       return CC_ARM_APCS_GHC;
   1876   }
   1877 }
   1878 
   1879 bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   1880                                   SmallVectorImpl<unsigned> &ArgRegs,
   1881                                   SmallVectorImpl<MVT> &ArgVTs,
   1882                                   SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags,
   1883                                   SmallVectorImpl<unsigned> &RegArgs,
   1884                                   CallingConv::ID CC,
   1885                                   unsigned &NumBytes,
   1886                                   bool isVarArg) {
   1887   SmallVector<CCValAssign, 16> ArgLocs;
   1888   CCState CCInfo(CC, isVarArg, *FuncInfo.MF, ArgLocs, *Context);
   1889   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags,
   1890                              CCAssignFnForCall(CC, false, isVarArg));
   1891 
   1892   // Check that we can handle all of the arguments. If we can't, then bail out
   1893   // now before we add code to the MBB.
   1894   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1895     CCValAssign &VA = ArgLocs[i];
   1896     MVT ArgVT = ArgVTs[VA.getValNo()];
   1897 
   1898     // We don't handle NEON/vector parameters yet.
   1899     if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64)
   1900       return false;
   1901 
   1902     // Now copy/store arg to correct locations.
   1903     if (VA.isRegLoc() && !VA.needsCustom()) {
   1904       continue;
   1905     } else if (VA.needsCustom()) {
   1906       // TODO: We need custom lowering for vector (v2f64) args.
   1907       if (VA.getLocVT() != MVT::f64 ||
   1908           // TODO: Only handle register args for now.
   1909           !VA.isRegLoc() || !ArgLocs[++i].isRegLoc())
   1910         return false;
   1911     } else {
   1912       switch (ArgVT.SimpleTy) {
   1913       default:
   1914         return false;
   1915       case MVT::i1:
   1916       case MVT::i8:
   1917       case MVT::i16:
   1918       case MVT::i32:
   1919         break;
   1920       case MVT::f32:
   1921         if (!Subtarget->hasVFP2())
   1922           return false;
   1923         break;
   1924       case MVT::f64:
   1925         if (!Subtarget->hasVFP2())
   1926           return false;
   1927         break;
   1928       }
   1929     }
   1930   }
   1931 
   1932   // At the point, we are able to handle the call's arguments in fast isel.
   1933 
   1934   // Get a count of how many bytes are to be pushed on the stack.
   1935   NumBytes = CCInfo.getNextStackOffset();
   1936 
   1937   // Issue CALLSEQ_START
   1938   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   1939   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1940                           TII.get(AdjStackDown))
   1941                   .addImm(NumBytes));
   1942 
   1943   // Process the args.
   1944   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1945     CCValAssign &VA = ArgLocs[i];
   1946     const Value *ArgVal = Args[VA.getValNo()];
   1947     unsigned Arg = ArgRegs[VA.getValNo()];
   1948     MVT ArgVT = ArgVTs[VA.getValNo()];
   1949 
   1950     assert((!ArgVT.isVector() && ArgVT.getSizeInBits() <= 64) &&
   1951            "We don't handle NEON/vector parameters yet.");
   1952 
   1953     // Handle arg promotion, etc.
   1954     switch (VA.getLocInfo()) {
   1955       case CCValAssign::Full: break;
   1956       case CCValAssign::SExt: {
   1957         MVT DestVT = VA.getLocVT();
   1958         Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/false);
   1959         assert (Arg != 0 && "Failed to emit a sext");
   1960         ArgVT = DestVT;
   1961         break;
   1962       }
   1963       case CCValAssign::AExt:
   1964         // Intentional fall-through.  Handle AExt and ZExt.
   1965       case CCValAssign::ZExt: {
   1966         MVT DestVT = VA.getLocVT();
   1967         Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true);
   1968         assert (Arg != 0 && "Failed to emit a zext");
   1969         ArgVT = DestVT;
   1970         break;
   1971       }
   1972       case CCValAssign::BCvt: {
   1973         unsigned BC = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg,
   1974                                  /*TODO: Kill=*/false);
   1975         assert(BC != 0 && "Failed to emit a bitcast!");
   1976         Arg = BC;
   1977         ArgVT = VA.getLocVT();
   1978         break;
   1979       }
   1980       default: llvm_unreachable("Unknown arg promotion!");
   1981     }
   1982 
   1983     // Now copy/store arg to correct locations.
   1984     if (VA.isRegLoc() && !VA.needsCustom()) {
   1985       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1986               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(Arg);
   1987       RegArgs.push_back(VA.getLocReg());
   1988     } else if (VA.needsCustom()) {
   1989       // TODO: We need custom lowering for vector (v2f64) args.
   1990       assert(VA.getLocVT() == MVT::f64 &&
   1991              "Custom lowering for v2f64 args not available");
   1992 
   1993       CCValAssign &NextVA = ArgLocs[++i];
   1994 
   1995       assert(VA.isRegLoc() && NextVA.isRegLoc() &&
   1996              "We only handle register args!");
   1997 
   1998       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1999                               TII.get(ARM::VMOVRRD), VA.getLocReg())
   2000                       .addReg(NextVA.getLocReg(), RegState::Define)
   2001                       .addReg(Arg));
   2002       RegArgs.push_back(VA.getLocReg());
   2003       RegArgs.push_back(NextVA.getLocReg());
   2004     } else {
   2005       assert(VA.isMemLoc());
   2006       // Need to store on the stack.
   2007 
   2008       // Don't emit stores for undef values.
   2009       if (isa<UndefValue>(ArgVal))
   2010         continue;
   2011 
   2012       Address Addr;
   2013       Addr.BaseType = Address::RegBase;
   2014       Addr.Base.Reg = ARM::SP;
   2015       Addr.Offset = VA.getLocMemOffset();
   2016 
   2017       bool EmitRet = ARMEmitStore(ArgVT, Arg, Addr); (void)EmitRet;
   2018       assert(EmitRet && "Could not emit a store for argument!");
   2019     }
   2020   }
   2021 
   2022   return true;
   2023 }
   2024 
   2025 bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
   2026                              const Instruction *I, CallingConv::ID CC,
   2027                              unsigned &NumBytes, bool isVarArg) {
   2028   // Issue CALLSEQ_END
   2029   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   2030   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2031                           TII.get(AdjStackUp))
   2032                   .addImm(NumBytes).addImm(0));
   2033 
   2034   // Now the return value.
   2035   if (RetVT != MVT::isVoid) {
   2036     SmallVector<CCValAssign, 16> RVLocs;
   2037     CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
   2038     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
   2039 
   2040     // Copy all of the result registers out of their specified physreg.
   2041     if (RVLocs.size() == 2 && RetVT == MVT::f64) {
   2042       // For this move we copy into two registers and then move into the
   2043       // double fp reg we want.
   2044       MVT DestVT = RVLocs[0].getValVT();
   2045       const TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT);
   2046       unsigned ResultReg = createResultReg(DstRC);
   2047       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2048                               TII.get(ARM::VMOVDRR), ResultReg)
   2049                       .addReg(RVLocs[0].getLocReg())
   2050                       .addReg(RVLocs[1].getLocReg()));
   2051 
   2052       UsedRegs.push_back(RVLocs[0].getLocReg());
   2053       UsedRegs.push_back(RVLocs[1].getLocReg());
   2054 
   2055       // Finally update the result.
   2056       updateValueMap(I, ResultReg);
   2057     } else {
   2058       assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!");
   2059       MVT CopyVT = RVLocs[0].getValVT();
   2060 
   2061       // Special handling for extended integers.
   2062       if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16)
   2063         CopyVT = MVT::i32;
   2064 
   2065       const TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
   2066 
   2067       unsigned ResultReg = createResultReg(DstRC);
   2068       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2069               TII.get(TargetOpcode::COPY),
   2070               ResultReg).addReg(RVLocs[0].getLocReg());
   2071       UsedRegs.push_back(RVLocs[0].getLocReg());
   2072 
   2073       // Finally update the result.
   2074       updateValueMap(I, ResultReg);
   2075     }
   2076   }
   2077 
   2078   return true;
   2079 }
   2080 
   2081 bool ARMFastISel::SelectRet(const Instruction *I) {
   2082   const ReturnInst *Ret = cast<ReturnInst>(I);
   2083   const Function &F = *I->getParent()->getParent();
   2084 
   2085   if (!FuncInfo.CanLowerReturn)
   2086     return false;
   2087 
   2088   // Build a list of return value registers.
   2089   SmallVector<unsigned, 4> RetRegs;
   2090 
   2091   CallingConv::ID CC = F.getCallingConv();
   2092   if (Ret->getNumOperands() > 0) {
   2093     SmallVector<ISD::OutputArg, 4> Outs;
   2094     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
   2095 
   2096     // Analyze operands of the call, assigning locations to each operand.
   2097     SmallVector<CCValAssign, 16> ValLocs;
   2098     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
   2099     CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */,
   2100                                                  F.isVarArg()));
   2101 
   2102     const Value *RV = Ret->getOperand(0);
   2103     unsigned Reg = getRegForValue(RV);
   2104     if (Reg == 0)
   2105       return false;
   2106 
   2107     // Only handle a single return value for now.
   2108     if (ValLocs.size() != 1)
   2109       return false;
   2110 
   2111     CCValAssign &VA = ValLocs[0];
   2112 
   2113     // Don't bother handling odd stuff for now.
   2114     if (VA.getLocInfo() != CCValAssign::Full)
   2115       return false;
   2116     // Only handle register returns for now.
   2117     if (!VA.isRegLoc())
   2118       return false;
   2119 
   2120     unsigned SrcReg = Reg + VA.getValNo();
   2121     EVT RVEVT = TLI.getValueType(RV->getType());
   2122     if (!RVEVT.isSimple()) return false;
   2123     MVT RVVT = RVEVT.getSimpleVT();
   2124     MVT DestVT = VA.getValVT();
   2125     // Special handling for extended integers.
   2126     if (RVVT != DestVT) {
   2127       if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16)
   2128         return false;
   2129 
   2130       assert(DestVT == MVT::i32 && "ARM should always ext to i32");
   2131 
   2132       // Perform extension if flagged as either zext or sext.  Otherwise, do
   2133       // nothing.
   2134       if (Outs[0].Flags.isZExt() || Outs[0].Flags.isSExt()) {
   2135         SrcReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, Outs[0].Flags.isZExt());
   2136         if (SrcReg == 0) return false;
   2137       }
   2138     }
   2139 
   2140     // Make the copy.
   2141     unsigned DstReg = VA.getLocReg();
   2142     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
   2143     // Avoid a cross-class copy. This is very unlikely.
   2144     if (!SrcRC->contains(DstReg))
   2145       return false;
   2146     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2147             TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
   2148 
   2149     // Add register to return instruction.
   2150     RetRegs.push_back(VA.getLocReg());
   2151   }
   2152 
   2153   unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET;
   2154   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2155                                     TII.get(RetOpc));
   2156   AddOptionalDefs(MIB);
   2157   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   2158     MIB.addReg(RetRegs[i], RegState::Implicit);
   2159   return true;
   2160 }
   2161 
   2162 unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) {
   2163   if (UseReg)
   2164     return isThumb2 ? ARM::tBLXr : ARM::BLX;
   2165   else
   2166     return isThumb2 ? ARM::tBL : ARM::BL;
   2167 }
   2168 
   2169 unsigned ARMFastISel::getLibcallReg(const Twine &Name) {
   2170   // Manually compute the global's type to avoid building it when unnecessary.
   2171   Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0);
   2172   EVT LCREVT = TLI.getValueType(GVTy);
   2173   if (!LCREVT.isSimple()) return 0;
   2174 
   2175   GlobalValue *GV = new GlobalVariable(M, Type::getInt32Ty(*Context), false,
   2176                                        GlobalValue::ExternalLinkage, nullptr,
   2177                                        Name);
   2178   assert(GV->getType() == GVTy && "We miscomputed the type for the global!");
   2179   return ARMMaterializeGV(GV, LCREVT.getSimpleVT());
   2180 }
   2181 
   2182 // A quick function that will emit a call for a named libcall in F with the
   2183 // vector of passed arguments for the Instruction in I. We can assume that we
   2184 // can emit a call for any libcall we can produce. This is an abridged version
   2185 // of the full call infrastructure since we won't need to worry about things
   2186 // like computed function pointers or strange arguments at call sites.
   2187 // TODO: Try to unify this and the normal call bits for ARM, then try to unify
   2188 // with X86.
   2189 bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) {
   2190   CallingConv::ID CC = TLI.getLibcallCallingConv(Call);
   2191 
   2192   // Handle *simple* calls for now.
   2193   Type *RetTy = I->getType();
   2194   MVT RetVT;
   2195   if (RetTy->isVoidTy())
   2196     RetVT = MVT::isVoid;
   2197   else if (!isTypeLegal(RetTy, RetVT))
   2198     return false;
   2199 
   2200   // Can't handle non-double multi-reg retvals.
   2201   if (RetVT != MVT::isVoid && RetVT != MVT::i32) {
   2202     SmallVector<CCValAssign, 16> RVLocs;
   2203     CCState CCInfo(CC, false, *FuncInfo.MF, RVLocs, *Context);
   2204     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, false));
   2205     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
   2206       return false;
   2207   }
   2208 
   2209   // Set up the argument vectors.
   2210   SmallVector<Value*, 8> Args;
   2211   SmallVector<unsigned, 8> ArgRegs;
   2212   SmallVector<MVT, 8> ArgVTs;
   2213   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   2214   Args.reserve(I->getNumOperands());
   2215   ArgRegs.reserve(I->getNumOperands());
   2216   ArgVTs.reserve(I->getNumOperands());
   2217   ArgFlags.reserve(I->getNumOperands());
   2218   for (unsigned i = 0; i < I->getNumOperands(); ++i) {
   2219     Value *Op = I->getOperand(i);
   2220     unsigned Arg = getRegForValue(Op);
   2221     if (Arg == 0) return false;
   2222 
   2223     Type *ArgTy = Op->getType();
   2224     MVT ArgVT;
   2225     if (!isTypeLegal(ArgTy, ArgVT)) return false;
   2226 
   2227     ISD::ArgFlagsTy Flags;
   2228     unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
   2229     Flags.setOrigAlign(OriginalAlignment);
   2230 
   2231     Args.push_back(Op);
   2232     ArgRegs.push_back(Arg);
   2233     ArgVTs.push_back(ArgVT);
   2234     ArgFlags.push_back(Flags);
   2235   }
   2236 
   2237   // Handle the arguments now that we've gotten them.
   2238   SmallVector<unsigned, 4> RegArgs;
   2239   unsigned NumBytes;
   2240   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   2241                        RegArgs, CC, NumBytes, false))
   2242     return false;
   2243 
   2244   unsigned CalleeReg = 0;
   2245   if (EnableARMLongCalls) {
   2246     CalleeReg = getLibcallReg(TLI.getLibcallName(Call));
   2247     if (CalleeReg == 0) return false;
   2248   }
   2249 
   2250   // Issue the call.
   2251   unsigned CallOpc = ARMSelectCallOp(EnableARMLongCalls);
   2252   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2253                                     DbgLoc, TII.get(CallOpc));
   2254   // BL / BLX don't take a predicate, but tBL / tBLX do.
   2255   if (isThumb2)
   2256     AddDefaultPred(MIB);
   2257   if (EnableARMLongCalls)
   2258     MIB.addReg(CalleeReg);
   2259   else
   2260     MIB.addExternalSymbol(TLI.getLibcallName(Call));
   2261 
   2262   // Add implicit physical register uses to the call.
   2263   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   2264     MIB.addReg(RegArgs[i], RegState::Implicit);
   2265 
   2266   // Add a register mask with the call-preserved registers.
   2267   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   2268   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   2269 
   2270   // Finish off the call including any return values.
   2271   SmallVector<unsigned, 4> UsedRegs;
   2272   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, false)) return false;
   2273 
   2274   // Set all unused physreg defs as dead.
   2275   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   2276 
   2277   return true;
   2278 }
   2279 
   2280 bool ARMFastISel::SelectCall(const Instruction *I,
   2281                              const char *IntrMemName = nullptr) {
   2282   const CallInst *CI = cast<CallInst>(I);
   2283   const Value *Callee = CI->getCalledValue();
   2284 
   2285   // Can't handle inline asm.
   2286   if (isa<InlineAsm>(Callee)) return false;
   2287 
   2288   // Allow SelectionDAG isel to handle tail calls.
   2289   if (CI->isTailCall()) return false;
   2290 
   2291   // Check the calling convention.
   2292   ImmutableCallSite CS(CI);
   2293   CallingConv::ID CC = CS.getCallingConv();
   2294 
   2295   // TODO: Avoid some calling conventions?
   2296 
   2297   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   2298   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   2299   bool isVarArg = FTy->isVarArg();
   2300 
   2301   // Handle *simple* calls for now.
   2302   Type *RetTy = I->getType();
   2303   MVT RetVT;
   2304   if (RetTy->isVoidTy())
   2305     RetVT = MVT::isVoid;
   2306   else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 &&
   2307            RetVT != MVT::i8  && RetVT != MVT::i1)
   2308     return false;
   2309 
   2310   // Can't handle non-double multi-reg retvals.
   2311   if (RetVT != MVT::isVoid && RetVT != MVT::i1 && RetVT != MVT::i8 &&
   2312       RetVT != MVT::i16 && RetVT != MVT::i32) {
   2313     SmallVector<CCValAssign, 16> RVLocs;
   2314     CCState CCInfo(CC, isVarArg, *FuncInfo.MF, RVLocs, *Context);
   2315     CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true, isVarArg));
   2316     if (RVLocs.size() >= 2 && RetVT != MVT::f64)
   2317       return false;
   2318   }
   2319 
   2320   // Set up the argument vectors.
   2321   SmallVector<Value*, 8> Args;
   2322   SmallVector<unsigned, 8> ArgRegs;
   2323   SmallVector<MVT, 8> ArgVTs;
   2324   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   2325   unsigned arg_size = CS.arg_size();
   2326   Args.reserve(arg_size);
   2327   ArgRegs.reserve(arg_size);
   2328   ArgVTs.reserve(arg_size);
   2329   ArgFlags.reserve(arg_size);
   2330   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
   2331        i != e; ++i) {
   2332     // If we're lowering a memory intrinsic instead of a regular call, skip the
   2333     // last two arguments, which shouldn't be passed to the underlying function.
   2334     if (IntrMemName && e-i <= 2)
   2335       break;
   2336 
   2337     ISD::ArgFlagsTy Flags;
   2338     unsigned AttrInd = i - CS.arg_begin() + 1;
   2339     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
   2340       Flags.setSExt();
   2341     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
   2342       Flags.setZExt();
   2343 
   2344     // FIXME: Only handle *easy* calls for now.
   2345     if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
   2346         CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
   2347         CS.paramHasAttr(AttrInd, Attribute::Nest) ||
   2348         CS.paramHasAttr(AttrInd, Attribute::ByVal))
   2349       return false;
   2350 
   2351     Type *ArgTy = (*i)->getType();
   2352     MVT ArgVT;
   2353     if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 &&
   2354         ArgVT != MVT::i1)
   2355       return false;
   2356 
   2357     unsigned Arg = getRegForValue(*i);
   2358     if (Arg == 0)
   2359       return false;
   2360 
   2361     unsigned OriginalAlignment = DL.getABITypeAlignment(ArgTy);
   2362     Flags.setOrigAlign(OriginalAlignment);
   2363 
   2364     Args.push_back(*i);
   2365     ArgRegs.push_back(Arg);
   2366     ArgVTs.push_back(ArgVT);
   2367     ArgFlags.push_back(Flags);
   2368   }
   2369 
   2370   // Handle the arguments now that we've gotten them.
   2371   SmallVector<unsigned, 4> RegArgs;
   2372   unsigned NumBytes;
   2373   if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags,
   2374                        RegArgs, CC, NumBytes, isVarArg))
   2375     return false;
   2376 
   2377   bool UseReg = false;
   2378   const GlobalValue *GV = dyn_cast<GlobalValue>(Callee);
   2379   if (!GV || EnableARMLongCalls) UseReg = true;
   2380 
   2381   unsigned CalleeReg = 0;
   2382   if (UseReg) {
   2383     if (IntrMemName)
   2384       CalleeReg = getLibcallReg(IntrMemName);
   2385     else
   2386       CalleeReg = getRegForValue(Callee);
   2387 
   2388     if (CalleeReg == 0) return false;
   2389   }
   2390 
   2391   // Issue the call.
   2392   unsigned CallOpc = ARMSelectCallOp(UseReg);
   2393   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2394                                     DbgLoc, TII.get(CallOpc));
   2395 
   2396   unsigned char OpFlags = 0;
   2397 
   2398   // Add MO_PLT for global address or external symbol in the PIC relocation
   2399   // model.
   2400   if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_)
   2401     OpFlags = ARMII::MO_PLT;
   2402 
   2403   // ARM calls don't take a predicate, but tBL / tBLX do.
   2404   if(isThumb2)
   2405     AddDefaultPred(MIB);
   2406   if (UseReg)
   2407     MIB.addReg(CalleeReg);
   2408   else if (!IntrMemName)
   2409     MIB.addGlobalAddress(GV, 0, OpFlags);
   2410   else
   2411     MIB.addExternalSymbol(IntrMemName, OpFlags);
   2412 
   2413   // Add implicit physical register uses to the call.
   2414   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   2415     MIB.addReg(RegArgs[i], RegState::Implicit);
   2416 
   2417   // Add a register mask with the call-preserved registers.
   2418   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   2419   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   2420 
   2421   // Finish off the call including any return values.
   2422   SmallVector<unsigned, 4> UsedRegs;
   2423   if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes, isVarArg))
   2424     return false;
   2425 
   2426   // Set all unused physreg defs as dead.
   2427   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   2428 
   2429   return true;
   2430 }
   2431 
   2432 bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) {
   2433   return Len <= 16;
   2434 }
   2435 
   2436 bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src,
   2437                                         uint64_t Len, unsigned Alignment) {
   2438   // Make sure we don't bloat code by inlining very large memcpy's.
   2439   if (!ARMIsMemCpySmall(Len))
   2440     return false;
   2441 
   2442   while (Len) {
   2443     MVT VT;
   2444     if (!Alignment || Alignment >= 4) {
   2445       if (Len >= 4)
   2446         VT = MVT::i32;
   2447       else if (Len >= 2)
   2448         VT = MVT::i16;
   2449       else {
   2450         assert (Len == 1 && "Expected a length of 1!");
   2451         VT = MVT::i8;
   2452       }
   2453     } else {
   2454       // Bound based on alignment.
   2455       if (Len >= 2 && Alignment == 2)
   2456         VT = MVT::i16;
   2457       else {
   2458         VT = MVT::i8;
   2459       }
   2460     }
   2461 
   2462     bool RV;
   2463     unsigned ResultReg;
   2464     RV = ARMEmitLoad(VT, ResultReg, Src);
   2465     assert (RV == true && "Should be able to handle this load.");
   2466     RV = ARMEmitStore(VT, ResultReg, Dest);
   2467     assert (RV == true && "Should be able to handle this store.");
   2468     (void)RV;
   2469 
   2470     unsigned Size = VT.getSizeInBits()/8;
   2471     Len -= Size;
   2472     Dest.Offset += Size;
   2473     Src.Offset += Size;
   2474   }
   2475 
   2476   return true;
   2477 }
   2478 
   2479 bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   2480   // FIXME: Handle more intrinsics.
   2481   switch (I.getIntrinsicID()) {
   2482   default: return false;
   2483   case Intrinsic::frameaddress: {
   2484     MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
   2485     MFI->setFrameAddressIsTaken(true);
   2486 
   2487     unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
   2488     const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
   2489                                              : &ARM::GPRRegClass;
   2490 
   2491     const ARMBaseRegisterInfo *RegInfo =
   2492         static_cast<const ARMBaseRegisterInfo *>(Subtarget->getRegisterInfo());
   2493     unsigned FramePtr = RegInfo->getFrameRegister(*(FuncInfo.MF));
   2494     unsigned SrcReg = FramePtr;
   2495 
   2496     // Recursively load frame address
   2497     // ldr r0 [fp]
   2498     // ldr r0 [r0]
   2499     // ldr r0 [r0]
   2500     // ...
   2501     unsigned DestReg;
   2502     unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue();
   2503     while (Depth--) {
   2504       DestReg = createResultReg(RC);
   2505       AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2506                               TII.get(LdrOpc), DestReg)
   2507                       .addReg(SrcReg).addImm(0));
   2508       SrcReg = DestReg;
   2509     }
   2510     updateValueMap(&I, SrcReg);
   2511     return true;
   2512   }
   2513   case Intrinsic::memcpy:
   2514   case Intrinsic::memmove: {
   2515     const MemTransferInst &MTI = cast<MemTransferInst>(I);
   2516     // Don't handle volatile.
   2517     if (MTI.isVolatile())
   2518       return false;
   2519 
   2520     // Disable inlining for memmove before calls to ComputeAddress.  Otherwise,
   2521     // we would emit dead code because we don't currently handle memmoves.
   2522     bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy);
   2523     if (isa<ConstantInt>(MTI.getLength()) && isMemCpy) {
   2524       // Small memcpy's are common enough that we want to do them without a call
   2525       // if possible.
   2526       uint64_t Len = cast<ConstantInt>(MTI.getLength())->getZExtValue();
   2527       if (ARMIsMemCpySmall(Len)) {
   2528         Address Dest, Src;
   2529         if (!ARMComputeAddress(MTI.getRawDest(), Dest) ||
   2530             !ARMComputeAddress(MTI.getRawSource(), Src))
   2531           return false;
   2532         unsigned Alignment = MTI.getAlignment();
   2533         if (ARMTryEmitSmallMemCpy(Dest, Src, Len, Alignment))
   2534           return true;
   2535       }
   2536     }
   2537 
   2538     if (!MTI.getLength()->getType()->isIntegerTy(32))
   2539       return false;
   2540 
   2541     if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255)
   2542       return false;
   2543 
   2544     const char *IntrMemName = isa<MemCpyInst>(I) ? "memcpy" : "memmove";
   2545     return SelectCall(&I, IntrMemName);
   2546   }
   2547   case Intrinsic::memset: {
   2548     const MemSetInst &MSI = cast<MemSetInst>(I);
   2549     // Don't handle volatile.
   2550     if (MSI.isVolatile())
   2551       return false;
   2552 
   2553     if (!MSI.getLength()->getType()->isIntegerTy(32))
   2554       return false;
   2555 
   2556     if (MSI.getDestAddressSpace() > 255)
   2557       return false;
   2558 
   2559     return SelectCall(&I, "memset");
   2560   }
   2561   case Intrinsic::trap: {
   2562     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(
   2563       Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP));
   2564     return true;
   2565   }
   2566   }
   2567 }
   2568 
   2569 bool ARMFastISel::SelectTrunc(const Instruction *I) {
   2570   // The high bits for a type smaller than the register size are assumed to be
   2571   // undefined.
   2572   Value *Op = I->getOperand(0);
   2573 
   2574   EVT SrcVT, DestVT;
   2575   SrcVT = TLI.getValueType(Op->getType(), true);
   2576   DestVT = TLI.getValueType(I->getType(), true);
   2577 
   2578   if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8)
   2579     return false;
   2580   if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1)
   2581     return false;
   2582 
   2583   unsigned SrcReg = getRegForValue(Op);
   2584   if (!SrcReg) return false;
   2585 
   2586   // Because the high bits are undefined, a truncate doesn't generate
   2587   // any code.
   2588   updateValueMap(I, SrcReg);
   2589   return true;
   2590 }
   2591 
   2592 unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT,
   2593                                     bool isZExt) {
   2594   if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8)
   2595     return 0;
   2596   if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1)
   2597     return 0;
   2598 
   2599   // Table of which combinations can be emitted as a single instruction,
   2600   // and which will require two.
   2601   static const uint8_t isSingleInstrTbl[3][2][2][2] = {
   2602     //            ARM                     Thumb
   2603     //           !hasV6Ops  hasV6Ops     !hasV6Ops  hasV6Ops
   2604     //    ext:     s  z      s  z          s  z      s  z
   2605     /*  1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } },
   2606     /*  8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } },
   2607     /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }
   2608   };
   2609 
   2610   // Target registers for:
   2611   //  - For ARM can never be PC.
   2612   //  - For 16-bit Thumb are restricted to lower 8 registers.
   2613   //  - For 32-bit Thumb are restricted to non-SP and non-PC.
   2614   static const TargetRegisterClass *RCTbl[2][2] = {
   2615     // Instructions: Two                     Single
   2616     /* ARM      */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass },
   2617     /* Thumb    */ { &ARM::tGPRRegClass,    &ARM::rGPRRegClass    }
   2618   };
   2619 
   2620   // Table governing the instruction(s) to be emitted.
   2621   static const struct InstructionTable {
   2622     uint32_t Opc   : 16;
   2623     uint32_t hasS  :  1; // Some instructions have an S bit, always set it to 0.
   2624     uint32_t Shift :  7; // For shift operand addressing mode, used by MOVsi.
   2625     uint32_t Imm   :  8; // All instructions have either a shift or a mask.
   2626   } IT[2][2][3][2] = {
   2627     { // Two instructions (first is left shift, second is in this table).
   2628       { // ARM                Opc           S  Shift             Imm
   2629         /*  1 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  31 },
   2630         /*  1 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  31 } },
   2631         /*  8 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  24 },
   2632         /*  8 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  24 } },
   2633         /* 16 bit sext */ { { ARM::MOVsi  , 1, ARM_AM::asr     ,  16 },
   2634         /* 16 bit zext */   { ARM::MOVsi  , 1, ARM_AM::lsr     ,  16 } }
   2635       },
   2636       { // Thumb              Opc           S  Shift             Imm
   2637         /*  1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  31 },
   2638         /*  1 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  31 } },
   2639         /*  8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  24 },
   2640         /*  8 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  24 } },
   2641         /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift,  16 },
   2642         /* 16 bit zext */   { ARM::tLSRri , 0, ARM_AM::no_shift,  16 } }
   2643       }
   2644     },
   2645     { // Single instruction.
   2646       { // ARM                Opc           S  Shift             Imm
   2647         /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
   2648         /*  1 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift,   1 } },
   2649         /*  8 bit sext */ { { ARM::SXTB   , 0, ARM_AM::no_shift,   0 },
   2650         /*  8 bit zext */   { ARM::ANDri  , 1, ARM_AM::no_shift, 255 } },
   2651         /* 16 bit sext */ { { ARM::SXTH   , 0, ARM_AM::no_shift,   0 },
   2652         /* 16 bit zext */   { ARM::UXTH   , 0, ARM_AM::no_shift,   0 } }
   2653       },
   2654       { // Thumb              Opc           S  Shift             Imm
   2655         /*  1 bit sext */ { { ARM::KILL   , 0, ARM_AM::no_shift,   0 },
   2656         /*  1 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift,   1 } },
   2657         /*  8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift,   0 },
   2658         /*  8 bit zext */   { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } },
   2659         /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift,   0 },
   2660         /* 16 bit zext */   { ARM::t2UXTH , 0, ARM_AM::no_shift,   0 } }
   2661       }
   2662     }
   2663   };
   2664 
   2665   unsigned SrcBits = SrcVT.getSizeInBits();
   2666   unsigned DestBits = DestVT.getSizeInBits();
   2667   (void) DestBits;
   2668   assert((SrcBits < DestBits) && "can only extend to larger types");
   2669   assert((DestBits == 32 || DestBits == 16 || DestBits == 8) &&
   2670          "other sizes unimplemented");
   2671   assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) &&
   2672          "other sizes unimplemented");
   2673 
   2674   bool hasV6Ops = Subtarget->hasV6Ops();
   2675   unsigned Bitness = SrcBits / 8;  // {1,8,16}=>{0,1,2}
   2676   assert((Bitness < 3) && "sanity-check table bounds");
   2677 
   2678   bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt];
   2679   const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr];
   2680   const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt];
   2681   unsigned Opc = ITP->Opc;
   2682   assert(ARM::KILL != Opc && "Invalid table entry");
   2683   unsigned hasS = ITP->hasS;
   2684   ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift;
   2685   assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) &&
   2686          "only MOVsi has shift operand addressing mode");
   2687   unsigned Imm = ITP->Imm;
   2688 
   2689   // 16-bit Thumb instructions always set CPSR (unless they're in an IT block).
   2690   bool setsCPSR = &ARM::tGPRRegClass == RC;
   2691   unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi;
   2692   unsigned ResultReg;
   2693   // MOVsi encodes shift and immediate in shift operand addressing mode.
   2694   // The following condition has the same value when emitting two
   2695   // instruction sequences: both are shifts.
   2696   bool ImmIsSO = (Shift != ARM_AM::no_shift);
   2697 
   2698   // Either one or two instructions are emitted.
   2699   // They're always of the form:
   2700   //   dst = in OP imm
   2701   // CPSR is set only by 16-bit Thumb instructions.
   2702   // Predicate, if any, is AL.
   2703   // S bit, if available, is always 0.
   2704   // When two are emitted the first's result will feed as the second's input,
   2705   // that value is then dead.
   2706   unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2;
   2707   for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) {
   2708     ResultReg = createResultReg(RC);
   2709     bool isLsl = (0 == Instr) && !isSingleInstr;
   2710     unsigned Opcode = isLsl ? LSLOpc : Opc;
   2711     ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift;
   2712     unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm;
   2713     bool isKill = 1 == Instr;
   2714     MachineInstrBuilder MIB = BuildMI(
   2715         *FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opcode), ResultReg);
   2716     if (setsCPSR)
   2717       MIB.addReg(ARM::CPSR, RegState::Define);
   2718     SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR);
   2719     AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc));
   2720     if (hasS)
   2721       AddDefaultCC(MIB);
   2722     // Second instruction consumes the first's result.
   2723     SrcReg = ResultReg;
   2724   }
   2725 
   2726   return ResultReg;
   2727 }
   2728 
   2729 bool ARMFastISel::SelectIntExt(const Instruction *I) {
   2730   // On ARM, in general, integer casts don't involve legal types; this code
   2731   // handles promotable integers.
   2732   Type *DestTy = I->getType();
   2733   Value *Src = I->getOperand(0);
   2734   Type *SrcTy = Src->getType();
   2735 
   2736   bool isZExt = isa<ZExtInst>(I);
   2737   unsigned SrcReg = getRegForValue(Src);
   2738   if (!SrcReg) return false;
   2739 
   2740   EVT SrcEVT, DestEVT;
   2741   SrcEVT = TLI.getValueType(SrcTy, true);
   2742   DestEVT = TLI.getValueType(DestTy, true);
   2743   if (!SrcEVT.isSimple()) return false;
   2744   if (!DestEVT.isSimple()) return false;
   2745 
   2746   MVT SrcVT = SrcEVT.getSimpleVT();
   2747   MVT DestVT = DestEVT.getSimpleVT();
   2748   unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt);
   2749   if (ResultReg == 0) return false;
   2750   updateValueMap(I, ResultReg);
   2751   return true;
   2752 }
   2753 
   2754 bool ARMFastISel::SelectShift(const Instruction *I,
   2755                               ARM_AM::ShiftOpc ShiftTy) {
   2756   // We handle thumb2 mode by target independent selector
   2757   // or SelectionDAG ISel.
   2758   if (isThumb2)
   2759     return false;
   2760 
   2761   // Only handle i32 now.
   2762   EVT DestVT = TLI.getValueType(I->getType(), true);
   2763   if (DestVT != MVT::i32)
   2764     return false;
   2765 
   2766   unsigned Opc = ARM::MOVsr;
   2767   unsigned ShiftImm;
   2768   Value *Src2Value = I->getOperand(1);
   2769   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Src2Value)) {
   2770     ShiftImm = CI->getZExtValue();
   2771 
   2772     // Fall back to selection DAG isel if the shift amount
   2773     // is zero or greater than the width of the value type.
   2774     if (ShiftImm == 0 || ShiftImm >=32)
   2775       return false;
   2776 
   2777     Opc = ARM::MOVsi;
   2778   }
   2779 
   2780   Value *Src1Value = I->getOperand(0);
   2781   unsigned Reg1 = getRegForValue(Src1Value);
   2782   if (Reg1 == 0) return false;
   2783 
   2784   unsigned Reg2 = 0;
   2785   if (Opc == ARM::MOVsr) {
   2786     Reg2 = getRegForValue(Src2Value);
   2787     if (Reg2 == 0) return false;
   2788   }
   2789 
   2790   unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass);
   2791   if(ResultReg == 0) return false;
   2792 
   2793   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2794                                     TII.get(Opc), ResultReg)
   2795                             .addReg(Reg1);
   2796 
   2797   if (Opc == ARM::MOVsi)
   2798     MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, ShiftImm));
   2799   else if (Opc == ARM::MOVsr) {
   2800     MIB.addReg(Reg2);
   2801     MIB.addImm(ARM_AM::getSORegOpc(ShiftTy, 0));
   2802   }
   2803 
   2804   AddOptionalDefs(MIB);
   2805   updateValueMap(I, ResultReg);
   2806   return true;
   2807 }
   2808 
   2809 // TODO: SoftFP support.
   2810 bool ARMFastISel::fastSelectInstruction(const Instruction *I) {
   2811 
   2812   switch (I->getOpcode()) {
   2813     case Instruction::Load:
   2814       return SelectLoad(I);
   2815     case Instruction::Store:
   2816       return SelectStore(I);
   2817     case Instruction::Br:
   2818       return SelectBranch(I);
   2819     case Instruction::IndirectBr:
   2820       return SelectIndirectBr(I);
   2821     case Instruction::ICmp:
   2822     case Instruction::FCmp:
   2823       return SelectCmp(I);
   2824     case Instruction::FPExt:
   2825       return SelectFPExt(I);
   2826     case Instruction::FPTrunc:
   2827       return SelectFPTrunc(I);
   2828     case Instruction::SIToFP:
   2829       return SelectIToFP(I, /*isSigned*/ true);
   2830     case Instruction::UIToFP:
   2831       return SelectIToFP(I, /*isSigned*/ false);
   2832     case Instruction::FPToSI:
   2833       return SelectFPToI(I, /*isSigned*/ true);
   2834     case Instruction::FPToUI:
   2835       return SelectFPToI(I, /*isSigned*/ false);
   2836     case Instruction::Add:
   2837       return SelectBinaryIntOp(I, ISD::ADD);
   2838     case Instruction::Or:
   2839       return SelectBinaryIntOp(I, ISD::OR);
   2840     case Instruction::Sub:
   2841       return SelectBinaryIntOp(I, ISD::SUB);
   2842     case Instruction::FAdd:
   2843       return SelectBinaryFPOp(I, ISD::FADD);
   2844     case Instruction::FSub:
   2845       return SelectBinaryFPOp(I, ISD::FSUB);
   2846     case Instruction::FMul:
   2847       return SelectBinaryFPOp(I, ISD::FMUL);
   2848     case Instruction::SDiv:
   2849       return SelectDiv(I, /*isSigned*/ true);
   2850     case Instruction::UDiv:
   2851       return SelectDiv(I, /*isSigned*/ false);
   2852     case Instruction::SRem:
   2853       return SelectRem(I, /*isSigned*/ true);
   2854     case Instruction::URem:
   2855       return SelectRem(I, /*isSigned*/ false);
   2856     case Instruction::Call:
   2857       if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
   2858         return SelectIntrinsicCall(*II);
   2859       return SelectCall(I);
   2860     case Instruction::Select:
   2861       return SelectSelect(I);
   2862     case Instruction::Ret:
   2863       return SelectRet(I);
   2864     case Instruction::Trunc:
   2865       return SelectTrunc(I);
   2866     case Instruction::ZExt:
   2867     case Instruction::SExt:
   2868       return SelectIntExt(I);
   2869     case Instruction::Shl:
   2870       return SelectShift(I, ARM_AM::lsl);
   2871     case Instruction::LShr:
   2872       return SelectShift(I, ARM_AM::lsr);
   2873     case Instruction::AShr:
   2874       return SelectShift(I, ARM_AM::asr);
   2875     default: break;
   2876   }
   2877   return false;
   2878 }
   2879 
   2880 namespace {
   2881 // This table describes sign- and zero-extend instructions which can be
   2882 // folded into a preceding load. All of these extends have an immediate
   2883 // (sometimes a mask and sometimes a shift) that's applied after
   2884 // extension.
   2885 const struct FoldableLoadExtendsStruct {
   2886   uint16_t Opc[2];  // ARM, Thumb.
   2887   uint8_t ExpectedImm;
   2888   uint8_t isZExt     : 1;
   2889   uint8_t ExpectedVT : 7;
   2890 } FoldableLoadExtends[] = {
   2891   { { ARM::SXTH,  ARM::t2SXTH  },   0, 0, MVT::i16 },
   2892   { { ARM::UXTH,  ARM::t2UXTH  },   0, 1, MVT::i16 },
   2893   { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8  },
   2894   { { ARM::SXTB,  ARM::t2SXTB  },   0, 0, MVT::i8  },
   2895   { { ARM::UXTB,  ARM::t2UXTB  },   0, 1, MVT::i8  }
   2896 };
   2897 }
   2898 
   2899 /// \brief The specified machine instr operand is a vreg, and that
   2900 /// vreg is being provided by the specified load instruction.  If possible,
   2901 /// try to fold the load as an operand to the instruction, returning true if
   2902 /// successful.
   2903 bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   2904                                       const LoadInst *LI) {
   2905   // Verify we have a legal type before going any further.
   2906   MVT VT;
   2907   if (!isLoadTypeLegal(LI->getType(), VT))
   2908     return false;
   2909 
   2910   // Combine load followed by zero- or sign-extend.
   2911   // ldrb r1, [r0]       ldrb r1, [r0]
   2912   // uxtb r2, r1     =>
   2913   // mov  r3, r2         mov  r3, r1
   2914   if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm())
   2915     return false;
   2916   const uint64_t Imm = MI->getOperand(2).getImm();
   2917 
   2918   bool Found = false;
   2919   bool isZExt;
   2920   for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends);
   2921        i != e; ++i) {
   2922     if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() &&
   2923         (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm &&
   2924         MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) {
   2925       Found = true;
   2926       isZExt = FoldableLoadExtends[i].isZExt;
   2927     }
   2928   }
   2929   if (!Found) return false;
   2930 
   2931   // See if we can handle this address.
   2932   Address Addr;
   2933   if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false;
   2934 
   2935   unsigned ResultReg = MI->getOperand(0).getReg();
   2936   if (!ARMEmitLoad(VT, ResultReg, Addr, LI->getAlignment(), isZExt, false))
   2937     return false;
   2938   MI->eraseFromParent();
   2939   return true;
   2940 }
   2941 
   2942 unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV,
   2943                                      unsigned Align, MVT VT) {
   2944   bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2945   ARMConstantPoolConstant *CPV =
   2946     ARMConstantPoolConstant::Create(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2947   unsigned Idx = MCP.getConstantPoolIndex(CPV, Align);
   2948 
   2949   unsigned Opc;
   2950   unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT));
   2951   // Load value.
   2952   if (isThumb2) {
   2953     DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0);
   2954     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2955                             TII.get(ARM::t2LDRpci), DestReg1)
   2956                     .addConstantPoolIndex(Idx));
   2957     Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs;
   2958   } else {
   2959     // The extra immediate is for addrmode2.
   2960     DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0);
   2961     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2962                             DbgLoc, TII.get(ARM::LDRcp), DestReg1)
   2963                     .addConstantPoolIndex(Idx).addImm(0));
   2964     Opc = UseGOTOFF ? ARM::ADDrr : ARM::LDRrs;
   2965   }
   2966 
   2967   unsigned GlobalBaseReg = AFI->getGlobalBaseReg();
   2968   if (GlobalBaseReg == 0) {
   2969     GlobalBaseReg = MRI.createVirtualRegister(TLI.getRegClassFor(VT));
   2970     AFI->setGlobalBaseReg(GlobalBaseReg);
   2971   }
   2972 
   2973   unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT));
   2974   DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0);
   2975   DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1);
   2976   GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2);
   2977   MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
   2978                                     DbgLoc, TII.get(Opc), DestReg2)
   2979                             .addReg(DestReg1)
   2980                             .addReg(GlobalBaseReg);
   2981   if (!UseGOTOFF)
   2982     MIB.addImm(0);
   2983   AddOptionalDefs(MIB);
   2984 
   2985   return DestReg2;
   2986 }
   2987 
   2988 bool ARMFastISel::fastLowerArguments() {
   2989   if (!FuncInfo.CanLowerReturn)
   2990     return false;
   2991 
   2992   const Function *F = FuncInfo.Fn;
   2993   if (F->isVarArg())
   2994     return false;
   2995 
   2996   CallingConv::ID CC = F->getCallingConv();
   2997   switch (CC) {
   2998   default:
   2999     return false;
   3000   case CallingConv::Fast:
   3001   case CallingConv::C:
   3002   case CallingConv::ARM_AAPCS_VFP:
   3003   case CallingConv::ARM_AAPCS:
   3004   case CallingConv::ARM_APCS:
   3005     break;
   3006   }
   3007 
   3008   // Only handle simple cases. i.e. Up to 4 i8/i16/i32 scalar arguments
   3009   // which are passed in r0 - r3.
   3010   unsigned Idx = 1;
   3011   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   3012        I != E; ++I, ++Idx) {
   3013     if (Idx > 4)
   3014       return false;
   3015 
   3016     if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
   3017         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
   3018         F->getAttributes().hasAttribute(Idx, Attribute::ByVal))
   3019       return false;
   3020 
   3021     Type *ArgTy = I->getType();
   3022     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
   3023       return false;
   3024 
   3025     EVT ArgVT = TLI.getValueType(ArgTy);
   3026     if (!ArgVT.isSimple()) return false;
   3027     switch (ArgVT.getSimpleVT().SimpleTy) {
   3028     case MVT::i8:
   3029     case MVT::i16:
   3030     case MVT::i32:
   3031       break;
   3032     default:
   3033       return false;
   3034     }
   3035   }
   3036 
   3037 
   3038   static const uint16_t GPRArgRegs[] = {
   3039     ARM::R0, ARM::R1, ARM::R2, ARM::R3
   3040   };
   3041 
   3042   const TargetRegisterClass *RC = &ARM::rGPRRegClass;
   3043   Idx = 0;
   3044   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   3045        I != E; ++I, ++Idx) {
   3046     unsigned SrcReg = GPRArgRegs[Idx];
   3047     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
   3048     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
   3049     // Without this, EmitLiveInCopies may eliminate the livein if its only
   3050     // use is a bitcast (which isn't turned into an instruction).
   3051     unsigned ResultReg = createResultReg(RC);
   3052     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3053             TII.get(TargetOpcode::COPY),
   3054             ResultReg).addReg(DstReg, getKillRegState(true));
   3055     updateValueMap(I, ResultReg);
   3056   }
   3057 
   3058   return true;
   3059 }
   3060 
   3061 namespace llvm {
   3062   FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo,
   3063                                 const TargetLibraryInfo *libInfo) {
   3064     const TargetMachine &TM = funcInfo.MF->getTarget();
   3065     const ARMSubtarget &STI =
   3066         static_cast<const ARMSubtarget &>(funcInfo.MF->getSubtarget());
   3067     // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl.
   3068     bool UseFastISel = false;
   3069     UseFastISel |= STI.isTargetMachO() && !STI.isThumb1Only();
   3070     UseFastISel |= STI.isTargetLinux() && !STI.isThumb();
   3071     UseFastISel |= STI.isTargetNaCl() && !STI.isThumb();
   3072 
   3073     if (UseFastISel) {
   3074       // iOS always has a FP for backtracking, force other targets
   3075       // to keep their FP when doing FastISel. The emitted code is
   3076       // currently superior, and in cases like test-suite's lencod
   3077       // FastISel isn't quite correct when FP is eliminated.
   3078       TM.Options.NoFramePointerElim = true;
   3079       return new ARMFastISel(funcInfo, libInfo);
   3080     }
   3081     return nullptr;
   3082   }
   3083 }
   3084