Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that ARM uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-isel"
     16 #include "ARM.h"
     17 #include "ARMAddressingModes.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMISelLowering.h"
     21 #include "ARMMachineFunctionInfo.h"
     22 #include "ARMPerfectShuffle.h"
     23 #include "ARMRegisterInfo.h"
     24 #include "ARMSubtarget.h"
     25 #include "ARMTargetMachine.h"
     26 #include "ARMTargetObjectFile.h"
     27 #include "llvm/CallingConv.h"
     28 #include "llvm/Constants.h"
     29 #include "llvm/Function.h"
     30 #include "llvm/GlobalValue.h"
     31 #include "llvm/Instruction.h"
     32 #include "llvm/Instructions.h"
     33 #include "llvm/Intrinsics.h"
     34 #include "llvm/Type.h"
     35 #include "llvm/CodeGen/CallingConvLower.h"
     36 #include "llvm/CodeGen/IntrinsicLowering.h"
     37 #include "llvm/CodeGen/MachineBasicBlock.h"
     38 #include "llvm/CodeGen/MachineFrameInfo.h"
     39 #include "llvm/CodeGen/MachineFunction.h"
     40 #include "llvm/CodeGen/MachineInstrBuilder.h"
     41 #include "llvm/CodeGen/MachineRegisterInfo.h"
     42 #include "llvm/CodeGen/PseudoSourceValue.h"
     43 #include "llvm/CodeGen/SelectionDAG.h"
     44 #include "llvm/MC/MCSectionMachO.h"
     45 #include "llvm/Target/TargetOptions.h"
     46 #include "llvm/ADT/VectorExtras.h"
     47 #include "llvm/ADT/StringExtras.h"
     48 #include "llvm/ADT/Statistic.h"
     49 #include "llvm/Support/CommandLine.h"
     50 #include "llvm/Support/ErrorHandling.h"
     51 #include "llvm/Support/MathExtras.h"
     52 #include "llvm/Support/raw_ostream.h"
     53 #include <sstream>
     54 using namespace llvm;
     55 
     56 STATISTIC(NumTailCalls, "Number of tail calls");
     57 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
     58 
     59 // This option should go away when tail calls fully work.
     60 static cl::opt<bool>
     61 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
     62   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
     63   cl::init(false));
     64 
     65 cl::opt<bool>
     66 EnableARMLongCalls("arm-long-calls", cl::Hidden,
     67   cl::desc("Generate calls via indirect call instructions"),
     68   cl::init(false));
     69 
     70 static cl::opt<bool>
     71 ARMInterworking("arm-interworking", cl::Hidden,
     72   cl::desc("Enable / disable ARM interworking (for debugging only)"),
     73   cl::init(true));
     74 
     75 namespace llvm {
     76   class ARMCCState : public CCState {
     77   public:
     78     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
     79                const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
     80                LLVMContext &C, ParmContext PC)
     81         : CCState(CC, isVarArg, MF, TM, locs, C) {
     82       assert(((PC == Call) || (PC == Prologue)) &&
     83              "ARMCCState users must specify whether their context is call"
     84              "or prologue generation.");
     85       CallOrPrologue = PC;
     86     }
     87   };
     88 }
     89 
     90 // The APCS parameter registers.
     91 static const unsigned GPRArgRegs[] = {
     92   ARM::R0, ARM::R1, ARM::R2, ARM::R3
     93 };
     94 
     95 void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT,
     96                                        EVT PromotedBitwiseVT) {
     97   if (VT != PromotedLdStVT) {
     98     setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote);
     99     AddPromotedToType (ISD::LOAD, VT.getSimpleVT(),
    100                        PromotedLdStVT.getSimpleVT());
    101 
    102     setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote);
    103     AddPromotedToType (ISD::STORE, VT.getSimpleVT(),
    104                        PromotedLdStVT.getSimpleVT());
    105   }
    106 
    107   EVT ElemTy = VT.getVectorElementType();
    108   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
    109     setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom);
    110   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom);
    111   if (ElemTy != MVT::i32) {
    112     setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand);
    113     setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand);
    114     setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand);
    115     setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand);
    116   }
    117   setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom);
    118   setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom);
    119   setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal);
    120   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal);
    121   setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand);
    122   setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand);
    123   if (VT.isInteger()) {
    124     setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom);
    125     setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom);
    126     setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom);
    127     setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand);
    128     setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand);
    129     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    130          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
    131       setTruncStoreAction(VT.getSimpleVT(),
    132                           (MVT::SimpleValueType)InnerVT, Expand);
    133   }
    134   setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand);
    135 
    136   // Promote all bit-wise operations.
    137   if (VT.isInteger() && VT != PromotedBitwiseVT) {
    138     setOperationAction(ISD::AND, VT.getSimpleVT(), Promote);
    139     AddPromotedToType (ISD::AND, VT.getSimpleVT(),
    140                        PromotedBitwiseVT.getSimpleVT());
    141     setOperationAction(ISD::OR,  VT.getSimpleVT(), Promote);
    142     AddPromotedToType (ISD::OR,  VT.getSimpleVT(),
    143                        PromotedBitwiseVT.getSimpleVT());
    144     setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote);
    145     AddPromotedToType (ISD::XOR, VT.getSimpleVT(),
    146                        PromotedBitwiseVT.getSimpleVT());
    147   }
    148 
    149   // Neon does not support vector divide/remainder operations.
    150   setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand);
    151   setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand);
    152   setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand);
    153   setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand);
    154   setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand);
    155   setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand);
    156 }
    157 
    158 void ARMTargetLowering::addDRTypeForNEON(EVT VT) {
    159   addRegisterClass(VT, ARM::DPRRegisterClass);
    160   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
    161 }
    162 
    163 void ARMTargetLowering::addQRTypeForNEON(EVT VT) {
    164   addRegisterClass(VT, ARM::QPRRegisterClass);
    165   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
    166 }
    167 
    168 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
    169   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
    170     return new TargetLoweringObjectFileMachO();
    171 
    172   return new ARMElfTargetObjectFile();
    173 }
    174 
    175 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    176     : TargetLowering(TM, createTLOF(TM)) {
    177   Subtarget = &TM.getSubtarget<ARMSubtarget>();
    178   RegInfo = TM.getRegisterInfo();
    179   Itins = TM.getInstrItineraryData();
    180 
    181   if (Subtarget->isTargetDarwin()) {
    182     // Uses VFP for Thumb libfuncs if available.
    183     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
    184       // Single-precision floating-point arithmetic.
    185       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
    186       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
    187       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
    188       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
    189 
    190       // Double-precision floating-point arithmetic.
    191       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
    192       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
    193       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
    194       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
    195 
    196       // Single-precision comparisons.
    197       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
    198       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
    199       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
    200       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
    201       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
    202       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
    203       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
    204       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
    205 
    206       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    207       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
    208       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    209       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    210       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    211       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    212       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    213       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    214 
    215       // Double-precision comparisons.
    216       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
    217       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
    218       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
    219       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
    220       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
    221       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
    222       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
    223       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
    224 
    225       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    226       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
    227       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    228       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    229       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    230       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    231       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    232       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    233 
    234       // Floating-point to integer conversions.
    235       // i64 conversions are done via library routines even when generating VFP
    236       // instructions, so use the same ones.
    237       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
    238       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
    239       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
    240       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
    241 
    242       // Conversions between floating types.
    243       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
    244       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
    245 
    246       // Integer to floating-point conversions.
    247       // i64 conversions are done via library routines even when generating VFP
    248       // instructions, so use the same ones.
    249       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
    250       // e.g., __floatunsidf vs. __floatunssidfvfp.
    251       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
    252       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
    253       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
    254       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
    255     }
    256   }
    257 
    258   // These libcalls are not available in 32-bit.
    259   setLibcallName(RTLIB::SHL_I128, 0);
    260   setLibcallName(RTLIB::SRL_I128, 0);
    261   setLibcallName(RTLIB::SRA_I128, 0);
    262 
    263   if (Subtarget->isAAPCS_ABI()) {
    264     // Double-precision floating-point arithmetic helper functions
    265     // RTABI chapter 4.1.2, Table 2
    266     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
    267     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
    268     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
    269     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
    270     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
    271     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
    272     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
    273     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
    274 
    275     // Double-precision floating-point comparison helper functions
    276     // RTABI chapter 4.1.2, Table 3
    277     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
    278     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    279     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
    280     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
    281     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
    282     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    283     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
    284     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    285     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
    286     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    287     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
    288     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    289     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
    290     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    291     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
    292     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    293     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
    294     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
    295     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
    296     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
    297     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
    298     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
    299     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
    300     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
    301 
    302     // Single-precision floating-point arithmetic helper functions
    303     // RTABI chapter 4.1.2, Table 4
    304     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
    305     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
    306     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
    307     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
    308     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
    309     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
    310     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
    311     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
    312 
    313     // Single-precision floating-point comparison helper functions
    314     // RTABI chapter 4.1.2, Table 5
    315     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
    316     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    317     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
    318     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
    319     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
    320     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    321     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
    322     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    323     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
    324     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    325     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
    326     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    327     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
    328     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    329     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
    330     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    331     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
    332     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
    333     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
    334     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
    335     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
    336     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
    337     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
    338     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
    339 
    340     // Floating-point to integer conversions.
    341     // RTABI chapter 4.1.2, Table 6
    342     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
    343     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
    344     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
    345     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
    346     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
    347     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
    348     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
    349     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
    350     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
    351     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
    352     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
    353     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
    354     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
    355     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
    356     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
    357     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
    358 
    359     // Conversions between floating types.
    360     // RTABI chapter 4.1.2, Table 7
    361     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
    362     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
    363     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
    364     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
    365 
    366     // Integer to floating-point conversions.
    367     // RTABI chapter 4.1.2, Table 8
    368     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
    369     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
    370     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
    371     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
    372     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
    373     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
    374     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
    375     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
    376     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    377     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    378     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    379     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    380     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    381     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    382     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    383     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    384 
    385     // Long long helper functions
    386     // RTABI chapter 4.2, Table 9
    387     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
    388     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
    389     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
    390     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
    391     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
    392     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
    393     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
    394     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    395     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    396     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
    397     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
    398     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
    399 
    400     // Integer division functions
    401     // RTABI chapter 4.3.1
    402     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
    403     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
    404     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
    405     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
    406     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
    407     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
    408     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
    409     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
    410     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
    411     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
    412     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
    413     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
    414 
    415     // Memory operations
    416     // RTABI chapter 4.3.4
    417     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
    418     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
    419     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
    420   }
    421 
    422   if (Subtarget->isThumb1Only())
    423     addRegisterClass(MVT::i32, ARM::tGPRRegisterClass);
    424   else
    425     addRegisterClass(MVT::i32, ARM::GPRRegisterClass);
    426   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
    427     addRegisterClass(MVT::f32, ARM::SPRRegisterClass);
    428     if (!Subtarget->isFPOnlySP())
    429       addRegisterClass(MVT::f64, ARM::DPRRegisterClass);
    430 
    431     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    432   }
    433 
    434   if (Subtarget->hasNEON()) {
    435     addDRTypeForNEON(MVT::v2f32);
    436     addDRTypeForNEON(MVT::v8i8);
    437     addDRTypeForNEON(MVT::v4i16);
    438     addDRTypeForNEON(MVT::v2i32);
    439     addDRTypeForNEON(MVT::v1i64);
    440 
    441     addQRTypeForNEON(MVT::v4f32);
    442     addQRTypeForNEON(MVT::v2f64);
    443     addQRTypeForNEON(MVT::v16i8);
    444     addQRTypeForNEON(MVT::v8i16);
    445     addQRTypeForNEON(MVT::v4i32);
    446     addQRTypeForNEON(MVT::v2i64);
    447 
    448     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
    449     // neither Neon nor VFP support any arithmetic operations on it.
    450     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    451     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    452     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    453     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
    454     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
    455     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
    456     setOperationAction(ISD::VSETCC, MVT::v2f64, Expand);
    457     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
    458     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    459     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
    460     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
    461     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
    462     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
    463     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
    464     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
    465     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
    466     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
    467     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
    468     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
    469     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
    470     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
    471     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
    472     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
    473     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
    474 
    475     setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand);
    476 
    477     // Neon does not support some operations on v1i64 and v2i64 types.
    478     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
    479     // Custom handling for some quad-vector types to detect VMULL.
    480     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
    481     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
    482     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
    483     // Custom handling for some vector types to avoid expensive expansions
    484     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
    485     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
    486     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
    487     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
    488     setOperationAction(ISD::VSETCC, MVT::v1i64, Expand);
    489     setOperationAction(ISD::VSETCC, MVT::v2i64, Expand);
    490     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
    491     // a destination type that is wider than the source.
    492     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
    493     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
    494 
    495     setTargetDAGCombine(ISD::INTRINSIC_VOID);
    496     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
    497     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
    498     setTargetDAGCombine(ISD::SHL);
    499     setTargetDAGCombine(ISD::SRL);
    500     setTargetDAGCombine(ISD::SRA);
    501     setTargetDAGCombine(ISD::SIGN_EXTEND);
    502     setTargetDAGCombine(ISD::ZERO_EXTEND);
    503     setTargetDAGCombine(ISD::ANY_EXTEND);
    504     setTargetDAGCombine(ISD::SELECT_CC);
    505     setTargetDAGCombine(ISD::BUILD_VECTOR);
    506     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
    507     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
    508     setTargetDAGCombine(ISD::STORE);
    509     setTargetDAGCombine(ISD::FP_TO_SINT);
    510     setTargetDAGCombine(ISD::FP_TO_UINT);
    511     setTargetDAGCombine(ISD::FDIV);
    512   }
    513 
    514   computeRegisterProperties();
    515 
    516   // ARM does not have f32 extending load.
    517   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    518 
    519   // ARM does not have i1 sign extending load.
    520   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
    521 
    522   // ARM supports all 4 flavors of integer indexed load / store.
    523   if (!Subtarget->isThumb1Only()) {
    524     for (unsigned im = (unsigned)ISD::PRE_INC;
    525          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
    526       setIndexedLoadAction(im,  MVT::i1,  Legal);
    527       setIndexedLoadAction(im,  MVT::i8,  Legal);
    528       setIndexedLoadAction(im,  MVT::i16, Legal);
    529       setIndexedLoadAction(im,  MVT::i32, Legal);
    530       setIndexedStoreAction(im, MVT::i1,  Legal);
    531       setIndexedStoreAction(im, MVT::i8,  Legal);
    532       setIndexedStoreAction(im, MVT::i16, Legal);
    533       setIndexedStoreAction(im, MVT::i32, Legal);
    534     }
    535   }
    536 
    537   // i64 operation support.
    538   setOperationAction(ISD::MUL,     MVT::i64, Expand);
    539   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
    540   if (Subtarget->isThumb1Only()) {
    541     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
    542     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
    543   }
    544   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
    545       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
    546     setOperationAction(ISD::MULHS, MVT::i32, Expand);
    547 
    548   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
    549   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
    550   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
    551   setOperationAction(ISD::SRL,       MVT::i64, Custom);
    552   setOperationAction(ISD::SRA,       MVT::i64, Custom);
    553 
    554   // ARM does not have ROTL.
    555   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
    556   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
    557   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
    558   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
    559     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
    560 
    561   // Only ARMv6 has BSWAP.
    562   if (!Subtarget->hasV6Ops())
    563     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
    564 
    565   // These are expanded into libcalls.
    566   if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) {
    567     // v7M has a hardware divider
    568     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
    569     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
    570   }
    571   setOperationAction(ISD::SREM,  MVT::i32, Expand);
    572   setOperationAction(ISD::UREM,  MVT::i32, Expand);
    573   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    574   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    575 
    576   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
    577   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
    578   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
    579   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
    580   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
    581 
    582   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    583 
    584   // Use the default implementation.
    585   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
    586   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
    587   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
    588   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
    589   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
    590   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
    591   setOperationAction(ISD::EHSELECTION,        MVT::i32,   Expand);
    592   setOperationAction(ISD::EXCEPTIONADDR,      MVT::i32,   Expand);
    593   setExceptionPointerRegister(ARM::R0);
    594   setExceptionSelectorRegister(ARM::R1);
    595 
    596   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
    597   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
    598   // the default expansion.
    599   if (Subtarget->hasDataBarrier() ||
    600       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
    601     // membarrier needs custom lowering; the rest are legal and handled
    602     // normally.
    603     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
    604   } else {
    605     // Set them all for expansion, which will force libcalls.
    606     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
    607     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i8,  Expand);
    608     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i16, Expand);
    609     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
    610     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i8,  Expand);
    611     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i16, Expand);
    612     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
    613     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i8,  Expand);
    614     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i16, Expand);
    615     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
    616     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i8,  Expand);
    617     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i16, Expand);
    618     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
    619     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i8,  Expand);
    620     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i16, Expand);
    621     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
    622     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i8,  Expand);
    623     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i16, Expand);
    624     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
    625     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i8,  Expand);
    626     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i16, Expand);
    627     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
    628     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8,  Expand);
    629     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand);
    630     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
    631     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8,  Expand);
    632     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand);
    633     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
    634     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8,  Expand);
    635     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand);
    636     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
    637     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8,  Expand);
    638     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand);
    639     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
    640     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8,  Expand);
    641     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand);
    642     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
    643     // Since the libcalls include locking, fold in the fences
    644     setShouldFoldAtomicFences(true);
    645   }
    646   // 64-bit versions are always libcalls (for now)
    647   setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Expand);
    648   setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Expand);
    649   setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Expand);
    650   setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Expand);
    651   setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Expand);
    652   setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Expand);
    653   setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Expand);
    654   setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand);
    655 
    656   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
    657 
    658   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
    659   if (!Subtarget->hasV6Ops()) {
    660     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    661     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
    662   }
    663   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    664 
    665   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
    666     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
    667     // iff target supports vfp2.
    668     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
    669     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
    670   }
    671 
    672   // We want to custom lower some of our intrinsics.
    673   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    674   if (Subtarget->isTargetDarwin()) {
    675     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
    676     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
    677     setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom);
    678     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
    679   }
    680 
    681   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    682   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
    683   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
    684   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
    685   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
    686   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
    687   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
    688   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    689   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    690 
    691   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
    692   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
    693   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
    694   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
    695   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
    696 
    697   // We don't support sin/cos/fmod/copysign/pow
    698   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
    699   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
    700   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
    701   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
    702   setOperationAction(ISD::FREM,      MVT::f64, Expand);
    703   setOperationAction(ISD::FREM,      MVT::f32, Expand);
    704   if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) {
    705     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    706     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
    707   }
    708   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
    709   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
    710 
    711   setOperationAction(ISD::FMA, MVT::f64, Expand);
    712   setOperationAction(ISD::FMA, MVT::f32, Expand);
    713 
    714   // Various VFP goodness
    715   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
    716     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
    717     if (Subtarget->hasVFP2()) {
    718       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    719       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
    720       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    721       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    722     }
    723     // Special handling for half-precision FP.
    724     if (!Subtarget->hasFP16()) {
    725       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
    726       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
    727     }
    728   }
    729 
    730   // We have target-specific dag combine patterns for the following nodes:
    731   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
    732   setTargetDAGCombine(ISD::ADD);
    733   setTargetDAGCombine(ISD::SUB);
    734   setTargetDAGCombine(ISD::MUL);
    735 
    736   if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON())
    737     setTargetDAGCombine(ISD::OR);
    738   if (Subtarget->hasNEON())
    739     setTargetDAGCombine(ISD::AND);
    740 
    741   setStackPointerRegisterToSaveRestore(ARM::SP);
    742 
    743   if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
    744     setSchedulingPreference(Sched::RegPressure);
    745   else
    746     setSchedulingPreference(Sched::Hybrid);
    747 
    748   //// temporary - rewrite interface to use type
    749   maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1;
    750 
    751   // On ARM arguments smaller than 4 bytes are extended, so all arguments
    752   // are at least 4 bytes aligned.
    753   setMinStackArgumentAlignment(4);
    754 
    755   benefitFromCodePlacementOpt = true;
    756 
    757   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
    758 }
    759 
    760 // FIXME: It might make sense to define the representative register class as the
    761 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
    762 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
    763 // SPR's representative would be DPR_VFP2. This should work well if register
    764 // pressure tracking were modified such that a register use would increment the
    765 // pressure of the register class's representative and all of it's super
    766 // classes' representatives transitively. We have not implemented this because
    767 // of the difficulty prior to coalescing of modeling operand register classes
    768 // due to the common occurrence of cross class copies and subregister insertions
    769 // and extractions.
    770 std::pair<const TargetRegisterClass*, uint8_t>
    771 ARMTargetLowering::findRepresentativeClass(EVT VT) const{
    772   const TargetRegisterClass *RRC = 0;
    773   uint8_t Cost = 1;
    774   switch (VT.getSimpleVT().SimpleTy) {
    775   default:
    776     return TargetLowering::findRepresentativeClass(VT);
    777   // Use DPR as representative register class for all floating point
    778   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
    779   // the cost is 1 for both f32 and f64.
    780   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
    781   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
    782     RRC = ARM::DPRRegisterClass;
    783     // When NEON is used for SP, only half of the register file is available
    784     // because operations that define both SP and DP results will be constrained
    785     // to the VFP2 class (D0-D15). We currently model this constraint prior to
    786     // coalescing by double-counting the SP regs. See the FIXME above.
    787     if (Subtarget->useNEONForSinglePrecisionFP())
    788       Cost = 2;
    789     break;
    790   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
    791   case MVT::v4f32: case MVT::v2f64:
    792     RRC = ARM::DPRRegisterClass;
    793     Cost = 2;
    794     break;
    795   case MVT::v4i64:
    796     RRC = ARM::DPRRegisterClass;
    797     Cost = 4;
    798     break;
    799   case MVT::v8i64:
    800     RRC = ARM::DPRRegisterClass;
    801     Cost = 8;
    802     break;
    803   }
    804   return std::make_pair(RRC, Cost);
    805 }
    806 
    807 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    808   switch (Opcode) {
    809   default: return 0;
    810   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
    811   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
    812   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
    813   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
    814   case ARMISD::CALL:          return "ARMISD::CALL";
    815   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
    816   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
    817   case ARMISD::tCALL:         return "ARMISD::tCALL";
    818   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
    819   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
    820   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
    821   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
    822   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
    823   case ARMISD::CMP:           return "ARMISD::CMP";
    824   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
    825   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
    826   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
    827   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
    828   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
    829   case ARMISD::CMOV:          return "ARMISD::CMOV";
    830 
    831   case ARMISD::RBIT:          return "ARMISD::RBIT";
    832 
    833   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
    834   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
    835   case ARMISD::SITOF:         return "ARMISD::SITOF";
    836   case ARMISD::UITOF:         return "ARMISD::UITOF";
    837 
    838   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
    839   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
    840   case ARMISD::RRX:           return "ARMISD::RRX";
    841 
    842   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
    843   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
    844 
    845   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
    846   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
    847   case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP";
    848 
    849   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
    850 
    851   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
    852 
    853   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
    854 
    855   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
    856   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
    857 
    858   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
    859 
    860   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
    861   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
    862   case ARMISD::VCGE:          return "ARMISD::VCGE";
    863   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
    864   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
    865   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
    866   case ARMISD::VCGT:          return "ARMISD::VCGT";
    867   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
    868   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
    869   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
    870   case ARMISD::VTST:          return "ARMISD::VTST";
    871 
    872   case ARMISD::VSHL:          return "ARMISD::VSHL";
    873   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
    874   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
    875   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
    876   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
    877   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
    878   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
    879   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
    880   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
    881   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
    882   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
    883   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
    884   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
    885   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
    886   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
    887   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
    888   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
    889   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
    890   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
    891   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
    892   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
    893   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
    894   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
    895   case ARMISD::VDUP:          return "ARMISD::VDUP";
    896   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
    897   case ARMISD::VEXT:          return "ARMISD::VEXT";
    898   case ARMISD::VREV64:        return "ARMISD::VREV64";
    899   case ARMISD::VREV32:        return "ARMISD::VREV32";
    900   case ARMISD::VREV16:        return "ARMISD::VREV16";
    901   case ARMISD::VZIP:          return "ARMISD::VZIP";
    902   case ARMISD::VUZP:          return "ARMISD::VUZP";
    903   case ARMISD::VTRN:          return "ARMISD::VTRN";
    904   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
    905   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
    906   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
    907   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
    908   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
    909   case ARMISD::FMAX:          return "ARMISD::FMAX";
    910   case ARMISD::FMIN:          return "ARMISD::FMIN";
    911   case ARMISD::BFI:           return "ARMISD::BFI";
    912   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
    913   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
    914   case ARMISD::VBSL:          return "ARMISD::VBSL";
    915   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
    916   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
    917   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
    918   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
    919   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
    920   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
    921   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
    922   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
    923   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
    924   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
    925   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
    926   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
    927   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
    928   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
    929   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
    930   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
    931   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
    932   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
    933   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
    934   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
    935   }
    936 }
    937 
    938 /// getRegClassFor - Return the register class that should be used for the
    939 /// specified value type.
    940 TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const {
    941   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
    942   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
    943   // load / store 4 to 8 consecutive D registers.
    944   if (Subtarget->hasNEON()) {
    945     if (VT == MVT::v4i64)
    946       return ARM::QQPRRegisterClass;
    947     else if (VT == MVT::v8i64)
    948       return ARM::QQQQPRRegisterClass;
    949   }
    950   return TargetLowering::getRegClassFor(VT);
    951 }
    952 
    953 // Create a fast isel object.
    954 FastISel *
    955 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const {
    956   return ARM::createFastISel(funcInfo);
    957 }
    958 
    959 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
    960 /// be used for loads / stores from the global.
    961 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
    962   return (Subtarget->isThumb1Only() ? 127 : 4095);
    963 }
    964 
    965 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
    966   unsigned NumVals = N->getNumValues();
    967   if (!NumVals)
    968     return Sched::RegPressure;
    969 
    970   for (unsigned i = 0; i != NumVals; ++i) {
    971     EVT VT = N->getValueType(i);
    972     if (VT == MVT::Glue || VT == MVT::Other)
    973       continue;
    974     if (VT.isFloatingPoint() || VT.isVector())
    975       return Sched::Latency;
    976   }
    977 
    978   if (!N->isMachineOpcode())
    979     return Sched::RegPressure;
    980 
    981   // Load are scheduled for latency even if there instruction itinerary
    982   // is not available.
    983   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    984   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
    985 
    986   if (MCID.getNumDefs() == 0)
    987     return Sched::RegPressure;
    988   if (!Itins->isEmpty() &&
    989       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
    990     return Sched::Latency;
    991 
    992   return Sched::RegPressure;
    993 }
    994 
    995 //===----------------------------------------------------------------------===//
    996 // Lowering Code
    997 //===----------------------------------------------------------------------===//
    998 
    999 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
   1000 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   1001   switch (CC) {
   1002   default: llvm_unreachable("Unknown condition code!");
   1003   case ISD::SETNE:  return ARMCC::NE;
   1004   case ISD::SETEQ:  return ARMCC::EQ;
   1005   case ISD::SETGT:  return ARMCC::GT;
   1006   case ISD::SETGE:  return ARMCC::GE;
   1007   case ISD::SETLT:  return ARMCC::LT;
   1008   case ISD::SETLE:  return ARMCC::LE;
   1009   case ISD::SETUGT: return ARMCC::HI;
   1010   case ISD::SETUGE: return ARMCC::HS;
   1011   case ISD::SETULT: return ARMCC::LO;
   1012   case ISD::SETULE: return ARMCC::LS;
   1013   }
   1014 }
   1015 
   1016 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
   1017 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   1018                         ARMCC::CondCodes &CondCode2) {
   1019   CondCode2 = ARMCC::AL;
   1020   switch (CC) {
   1021   default: llvm_unreachable("Unknown FP condition!");
   1022   case ISD::SETEQ:
   1023   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
   1024   case ISD::SETGT:
   1025   case ISD::SETOGT: CondCode = ARMCC::GT; break;
   1026   case ISD::SETGE:
   1027   case ISD::SETOGE: CondCode = ARMCC::GE; break;
   1028   case ISD::SETOLT: CondCode = ARMCC::MI; break;
   1029   case ISD::SETOLE: CondCode = ARMCC::LS; break;
   1030   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
   1031   case ISD::SETO:   CondCode = ARMCC::VC; break;
   1032   case ISD::SETUO:  CondCode = ARMCC::VS; break;
   1033   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
   1034   case ISD::SETUGT: CondCode = ARMCC::HI; break;
   1035   case ISD::SETUGE: CondCode = ARMCC::PL; break;
   1036   case ISD::SETLT:
   1037   case ISD::SETULT: CondCode = ARMCC::LT; break;
   1038   case ISD::SETLE:
   1039   case ISD::SETULE: CondCode = ARMCC::LE; break;
   1040   case ISD::SETNE:
   1041   case ISD::SETUNE: CondCode = ARMCC::NE; break;
   1042   }
   1043 }
   1044 
   1045 //===----------------------------------------------------------------------===//
   1046 //                      Calling Convention Implementation
   1047 //===----------------------------------------------------------------------===//
   1048 
   1049 #include "ARMGenCallingConv.inc"
   1050 
   1051 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
   1052 /// given CallingConvention value.
   1053 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
   1054                                                  bool Return,
   1055                                                  bool isVarArg) const {
   1056   switch (CC) {
   1057   default:
   1058     llvm_unreachable("Unsupported calling convention");
   1059   case CallingConv::Fast:
   1060     if (Subtarget->hasVFP2() && !isVarArg) {
   1061       if (!Subtarget->isAAPCS_ABI())
   1062         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1063       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1064       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1065     }
   1066     // Fallthrough
   1067   case CallingConv::C: {
   1068     // Use target triple & subtarget features to do actual dispatch.
   1069     if (!Subtarget->isAAPCS_ABI())
   1070       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1071     else if (Subtarget->hasVFP2() &&
   1072              FloatABIType == FloatABI::Hard && !isVarArg)
   1073       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1074     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1075   }
   1076   case CallingConv::ARM_AAPCS_VFP:
   1077     return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1078   case CallingConv::ARM_AAPCS:
   1079     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1080   case CallingConv::ARM_APCS:
   1081     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1082   }
   1083 }
   1084 
   1085 /// LowerCallResult - Lower the result values of a call into the
   1086 /// appropriate copies out of appropriate physical registers.
   1087 SDValue
   1088 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   1089                                    CallingConv::ID CallConv, bool isVarArg,
   1090                                    const SmallVectorImpl<ISD::InputArg> &Ins,
   1091                                    DebugLoc dl, SelectionDAG &DAG,
   1092                                    SmallVectorImpl<SDValue> &InVals) const {
   1093 
   1094   // Assign locations to each value returned by this call.
   1095   SmallVector<CCValAssign, 16> RVLocs;
   1096   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1097                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1098   CCInfo.AnalyzeCallResult(Ins,
   1099                            CCAssignFnForNode(CallConv, /* Return*/ true,
   1100                                              isVarArg));
   1101 
   1102   // Copy all of the result registers out of their specified physreg.
   1103   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   1104     CCValAssign VA = RVLocs[i];
   1105 
   1106     SDValue Val;
   1107     if (VA.needsCustom()) {
   1108       // Handle f64 or half of a v2f64.
   1109       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1110                                       InFlag);
   1111       Chain = Lo.getValue(1);
   1112       InFlag = Lo.getValue(2);
   1113       VA = RVLocs[++i]; // skip ahead to next loc
   1114       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1115                                       InFlag);
   1116       Chain = Hi.getValue(1);
   1117       InFlag = Hi.getValue(2);
   1118       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1119 
   1120       if (VA.getLocVT() == MVT::v2f64) {
   1121         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   1122         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1123                           DAG.getConstant(0, MVT::i32));
   1124 
   1125         VA = RVLocs[++i]; // skip ahead to next loc
   1126         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1127         Chain = Lo.getValue(1);
   1128         InFlag = Lo.getValue(2);
   1129         VA = RVLocs[++i]; // skip ahead to next loc
   1130         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1131         Chain = Hi.getValue(1);
   1132         InFlag = Hi.getValue(2);
   1133         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1134         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1135                           DAG.getConstant(1, MVT::i32));
   1136       }
   1137     } else {
   1138       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
   1139                                InFlag);
   1140       Chain = Val.getValue(1);
   1141       InFlag = Val.getValue(2);
   1142     }
   1143 
   1144     switch (VA.getLocInfo()) {
   1145     default: llvm_unreachable("Unknown loc info!");
   1146     case CCValAssign::Full: break;
   1147     case CCValAssign::BCvt:
   1148       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
   1149       break;
   1150     }
   1151 
   1152     InVals.push_back(Val);
   1153   }
   1154 
   1155   return Chain;
   1156 }
   1157 
   1158 /// LowerMemOpCallTo - Store the argument to the stack.
   1159 SDValue
   1160 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   1161                                     SDValue StackPtr, SDValue Arg,
   1162                                     DebugLoc dl, SelectionDAG &DAG,
   1163                                     const CCValAssign &VA,
   1164                                     ISD::ArgFlagsTy Flags) const {
   1165   unsigned LocMemOffset = VA.getLocMemOffset();
   1166   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1167   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   1168   return DAG.getStore(Chain, dl, Arg, PtrOff,
   1169                       MachinePointerInfo::getStack(LocMemOffset),
   1170                       false, false, 0);
   1171 }
   1172 
   1173 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
   1174                                          SDValue Chain, SDValue &Arg,
   1175                                          RegsToPassVector &RegsToPass,
   1176                                          CCValAssign &VA, CCValAssign &NextVA,
   1177                                          SDValue &StackPtr,
   1178                                          SmallVector<SDValue, 8> &MemOpChains,
   1179                                          ISD::ArgFlagsTy Flags) const {
   1180 
   1181   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   1182                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
   1183   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
   1184 
   1185   if (NextVA.isRegLoc())
   1186     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
   1187   else {
   1188     assert(NextVA.isMemLoc());
   1189     if (StackPtr.getNode() == 0)
   1190       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1191 
   1192     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
   1193                                            dl, DAG, NextVA,
   1194                                            Flags));
   1195   }
   1196 }
   1197 
   1198 /// LowerCall - Lowering a call into a callseq_start <-
   1199 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
   1200 /// nodes.
   1201 SDValue
   1202 ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   1203                              CallingConv::ID CallConv, bool isVarArg,
   1204                              bool &isTailCall,
   1205                              const SmallVectorImpl<ISD::OutputArg> &Outs,
   1206                              const SmallVectorImpl<SDValue> &OutVals,
   1207                              const SmallVectorImpl<ISD::InputArg> &Ins,
   1208                              DebugLoc dl, SelectionDAG &DAG,
   1209                              SmallVectorImpl<SDValue> &InVals) const {
   1210   MachineFunction &MF = DAG.getMachineFunction();
   1211   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   1212   bool IsSibCall = false;
   1213   // Temporarily disable tail calls so things don't break.
   1214   if (!EnableARMTailCalls)
   1215     isTailCall = false;
   1216   if (isTailCall) {
   1217     // Check if it's really possible to do a tail call.
   1218     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   1219                     isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
   1220                                                    Outs, OutVals, Ins, DAG);
   1221     // We don't support GuaranteedTailCallOpt for ARM, only automatically
   1222     // detected sibcalls.
   1223     if (isTailCall) {
   1224       ++NumTailCalls;
   1225       IsSibCall = true;
   1226     }
   1227   }
   1228 
   1229   // Analyze operands of the call, assigning locations to each operand.
   1230   SmallVector<CCValAssign, 16> ArgLocs;
   1231   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1232                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1233   CCInfo.AnalyzeCallOperands(Outs,
   1234                              CCAssignFnForNode(CallConv, /* Return*/ false,
   1235                                                isVarArg));
   1236 
   1237   // Get a count of how many bytes are to be pushed on the stack.
   1238   unsigned NumBytes = CCInfo.getNextStackOffset();
   1239 
   1240   // For tail calls, memory operands are available in our caller's stack.
   1241   if (IsSibCall)
   1242     NumBytes = 0;
   1243 
   1244   // Adjust the stack pointer for the new arguments...
   1245   // These operations are automatically eliminated by the prolog/epilog pass
   1246   if (!IsSibCall)
   1247     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
   1248 
   1249   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1250 
   1251   RegsToPassVector RegsToPass;
   1252   SmallVector<SDValue, 8> MemOpChains;
   1253 
   1254   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   1255   // of tail call optimization, arguments are handled later.
   1256   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1257        i != e;
   1258        ++i, ++realArgIdx) {
   1259     CCValAssign &VA = ArgLocs[i];
   1260     SDValue Arg = OutVals[realArgIdx];
   1261     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1262     bool isByVal = Flags.isByVal();
   1263 
   1264     // Promote the value if needed.
   1265     switch (VA.getLocInfo()) {
   1266     default: llvm_unreachable("Unknown loc info!");
   1267     case CCValAssign::Full: break;
   1268     case CCValAssign::SExt:
   1269       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
   1270       break;
   1271     case CCValAssign::ZExt:
   1272       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
   1273       break;
   1274     case CCValAssign::AExt:
   1275       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
   1276       break;
   1277     case CCValAssign::BCvt:
   1278       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1279       break;
   1280     }
   1281 
   1282     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
   1283     if (VA.needsCustom()) {
   1284       if (VA.getLocVT() == MVT::v2f64) {
   1285         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1286                                   DAG.getConstant(0, MVT::i32));
   1287         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1288                                   DAG.getConstant(1, MVT::i32));
   1289 
   1290         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
   1291                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1292 
   1293         VA = ArgLocs[++i]; // skip ahead to next loc
   1294         if (VA.isRegLoc()) {
   1295           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
   1296                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1297         } else {
   1298           assert(VA.isMemLoc());
   1299 
   1300           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
   1301                                                  dl, DAG, VA, Flags));
   1302         }
   1303       } else {
   1304         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
   1305                          StackPtr, MemOpChains, Flags);
   1306       }
   1307     } else if (VA.isRegLoc()) {
   1308       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   1309     } else if (isByVal) {
   1310       assert(VA.isMemLoc());
   1311       unsigned offset = 0;
   1312 
   1313       // True if this byval aggregate will be split between registers
   1314       // and memory.
   1315       if (CCInfo.isFirstByValRegValid()) {
   1316         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1317         unsigned int i, j;
   1318         for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
   1319           SDValue Const = DAG.getConstant(4*i, MVT::i32);
   1320           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
   1321           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
   1322                                      MachinePointerInfo(),
   1323                                      false, false, 0);
   1324           MemOpChains.push_back(Load.getValue(1));
   1325           RegsToPass.push_back(std::make_pair(j, Load));
   1326         }
   1327         offset = ARM::R4 - CCInfo.getFirstByValReg();
   1328         CCInfo.clearFirstByValReg();
   1329       }
   1330 
   1331       unsigned LocMemOffset = VA.getLocMemOffset();
   1332       SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1333       SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
   1334                                 StkPtrOff);
   1335       SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
   1336       SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
   1337       SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
   1338                                          MVT::i32);
   1339       MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
   1340                                           Flags.getByValAlign(),
   1341                                           /*isVolatile=*/false,
   1342                                           /*AlwaysInline=*/false,
   1343                                           MachinePointerInfo(0),
   1344                                           MachinePointerInfo(0)));
   1345 
   1346     } else if (!IsSibCall) {
   1347       assert(VA.isMemLoc());
   1348 
   1349       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
   1350                                              dl, DAG, VA, Flags));
   1351     }
   1352   }
   1353 
   1354   if (!MemOpChains.empty())
   1355     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1356                         &MemOpChains[0], MemOpChains.size());
   1357 
   1358   // Build a sequence of copy-to-reg nodes chained together with token chain
   1359   // and flag operands which copy the outgoing args into the appropriate regs.
   1360   SDValue InFlag;
   1361   // Tail call byval lowering might overwrite argument registers so in case of
   1362   // tail call optimization the copies to registers are lowered later.
   1363   if (!isTailCall)
   1364     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1365       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1366                                RegsToPass[i].second, InFlag);
   1367       InFlag = Chain.getValue(1);
   1368     }
   1369 
   1370   // For tail calls lower the arguments to the 'real' stack slot.
   1371   if (isTailCall) {
   1372     // Force all the incoming stack arguments to be loaded from the stack
   1373     // before any new outgoing arguments are stored to the stack, because the
   1374     // outgoing stack slots may alias the incoming argument stack slots, and
   1375     // the alias isn't otherwise explicit. This is slightly more conservative
   1376     // than necessary, because it means that each store effectively depends
   1377     // on every argument instead of just those arguments it would clobber.
   1378 
   1379     // Do not flag preceding copytoreg stuff together with the following stuff.
   1380     InFlag = SDValue();
   1381     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1382       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1383                                RegsToPass[i].second, InFlag);
   1384       InFlag = Chain.getValue(1);
   1385     }
   1386     InFlag =SDValue();
   1387   }
   1388 
   1389   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   1390   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   1391   // node so that legalize doesn't hack it.
   1392   bool isDirect = false;
   1393   bool isARMFunc = false;
   1394   bool isLocalARMFunc = false;
   1395   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1396 
   1397   if (EnableARMLongCalls) {
   1398     assert (getTargetMachine().getRelocationModel() == Reloc::Static
   1399             && "long-calls with non-static relocation model!");
   1400     // Handle a global address or an external symbol. If it's not one of
   1401     // those, the target's already in a register, so we don't need to do
   1402     // anything extra.
   1403     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1404       const GlobalValue *GV = G->getGlobal();
   1405       // Create a constant pool entry for the callee address
   1406       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1407       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
   1408                                                            ARMPCLabelIndex,
   1409                                                            ARMCP::CPValue, 0);
   1410       // Get the address of the callee into a register
   1411       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1412       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1413       Callee = DAG.getLoad(getPointerTy(), dl,
   1414                            DAG.getEntryNode(), CPAddr,
   1415                            MachinePointerInfo::getConstantPool(),
   1416                            false, false, 0);
   1417     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1418       const char *Sym = S->getSymbol();
   1419 
   1420       // Create a constant pool entry for the callee address
   1421       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1422       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
   1423                                                        Sym, ARMPCLabelIndex, 0);
   1424       // Get the address of the callee into a register
   1425       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1426       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1427       Callee = DAG.getLoad(getPointerTy(), dl,
   1428                            DAG.getEntryNode(), CPAddr,
   1429                            MachinePointerInfo::getConstantPool(),
   1430                            false, false, 0);
   1431     }
   1432   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1433     const GlobalValue *GV = G->getGlobal();
   1434     isDirect = true;
   1435     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
   1436     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
   1437                    getTargetMachine().getRelocationModel() != Reloc::Static;
   1438     isARMFunc = !Subtarget->isThumb() || isStub;
   1439     // ARM call to a local ARM function is predicable.
   1440     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
   1441     // tBX takes a register source operand.
   1442     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1443       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1444       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
   1445                                                            ARMPCLabelIndex,
   1446                                                            ARMCP::CPValue, 4);
   1447       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1448       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1449       Callee = DAG.getLoad(getPointerTy(), dl,
   1450                            DAG.getEntryNode(), CPAddr,
   1451                            MachinePointerInfo::getConstantPool(),
   1452                            false, false, 0);
   1453       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1454       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1455                            getPointerTy(), Callee, PICLabel);
   1456     } else {
   1457       // On ELF targets for PIC code, direct calls should go through the PLT
   1458       unsigned OpFlags = 0;
   1459       if (Subtarget->isTargetELF() &&
   1460                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1461         OpFlags = ARMII::MO_PLT;
   1462       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
   1463     }
   1464   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1465     isDirect = true;
   1466     bool isStub = Subtarget->isTargetDarwin() &&
   1467                   getTargetMachine().getRelocationModel() != Reloc::Static;
   1468     isARMFunc = !Subtarget->isThumb() || isStub;
   1469     // tBX takes a register source operand.
   1470     const char *Sym = S->getSymbol();
   1471     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1472       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1473       ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
   1474                                                        Sym, ARMPCLabelIndex, 4);
   1475       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1476       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1477       Callee = DAG.getLoad(getPointerTy(), dl,
   1478                            DAG.getEntryNode(), CPAddr,
   1479                            MachinePointerInfo::getConstantPool(),
   1480                            false, false, 0);
   1481       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1482       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1483                            getPointerTy(), Callee, PICLabel);
   1484     } else {
   1485       unsigned OpFlags = 0;
   1486       // On ELF targets for PIC code, direct calls should go through the PLT
   1487       if (Subtarget->isTargetELF() &&
   1488                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1489         OpFlags = ARMII::MO_PLT;
   1490       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
   1491     }
   1492   }
   1493 
   1494   // FIXME: handle tail calls differently.
   1495   unsigned CallOpc;
   1496   if (Subtarget->isThumb()) {
   1497     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
   1498       CallOpc = ARMISD::CALL_NOLINK;
   1499     else
   1500       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
   1501   } else {
   1502     CallOpc = (isDirect || Subtarget->hasV5TOps())
   1503       ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL)
   1504       : ARMISD::CALL_NOLINK;
   1505   }
   1506 
   1507   std::vector<SDValue> Ops;
   1508   Ops.push_back(Chain);
   1509   Ops.push_back(Callee);
   1510 
   1511   // Add argument registers to the end of the list so that they are known live
   1512   // into the call.
   1513   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   1514     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   1515                                   RegsToPass[i].second.getValueType()));
   1516 
   1517   if (InFlag.getNode())
   1518     Ops.push_back(InFlag);
   1519 
   1520   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1521   if (isTailCall)
   1522     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   1523 
   1524   // Returns a chain and a flag for retval copy to use.
   1525   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   1526   InFlag = Chain.getValue(1);
   1527 
   1528   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1529                              DAG.getIntPtrConstant(0, true), InFlag);
   1530   if (!Ins.empty())
   1531     InFlag = Chain.getValue(1);
   1532 
   1533   // Handle result values, copying them out of physregs into vregs that we
   1534   // return.
   1535   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
   1536                          dl, DAG, InVals);
   1537 }
   1538 
   1539 /// HandleByVal - Every parameter *after* a byval parameter is passed
   1540 /// on the stack.  Remember the next parameter register to allocate,
   1541 /// and then confiscate the rest of the parameter registers to insure
   1542 /// this.
   1543 void
   1544 llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const {
   1545   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   1546   assert((State->getCallOrPrologue() == Prologue ||
   1547           State->getCallOrPrologue() == Call) &&
   1548          "unhandled ParmContext");
   1549   if ((!State->isFirstByValRegValid()) &&
   1550       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
   1551     State->setFirstByValReg(reg);
   1552     // At a call site, a byval parameter that is split between
   1553     // registers and memory needs its size truncated here.  In a
   1554     // function prologue, such byval parameters are reassembled in
   1555     // memory, and are not truncated.
   1556     if (State->getCallOrPrologue() == Call) {
   1557       unsigned excess = 4 * (ARM::R4 - reg);
   1558       assert(size >= excess && "expected larger existing stack allocation");
   1559       size -= excess;
   1560     }
   1561   }
   1562   // Confiscate any remaining parameter registers to preclude their
   1563   // assignment to subsequent parameters.
   1564   while (State->AllocateReg(GPRArgRegs, 4))
   1565     ;
   1566 }
   1567 
   1568 /// MatchingStackOffset - Return true if the given stack call argument is
   1569 /// already available in the same position (relatively) of the caller's
   1570 /// incoming argument stack.
   1571 static
   1572 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   1573                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
   1574                          const ARMInstrInfo *TII) {
   1575   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
   1576   int FI = INT_MAX;
   1577   if (Arg.getOpcode() == ISD::CopyFromReg) {
   1578     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
   1579     if (!TargetRegisterInfo::isVirtualRegister(VR))
   1580       return false;
   1581     MachineInstr *Def = MRI->getVRegDef(VR);
   1582     if (!Def)
   1583       return false;
   1584     if (!Flags.isByVal()) {
   1585       if (!TII->isLoadFromStackSlot(Def, FI))
   1586         return false;
   1587     } else {
   1588       return false;
   1589     }
   1590   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
   1591     if (Flags.isByVal())
   1592       // ByVal argument is passed in as a pointer but it's now being
   1593       // dereferenced. e.g.
   1594       // define @foo(%struct.X* %A) {
   1595       //   tail call @bar(%struct.X* byval %A)
   1596       // }
   1597       return false;
   1598     SDValue Ptr = Ld->getBasePtr();
   1599     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
   1600     if (!FINode)
   1601       return false;
   1602     FI = FINode->getIndex();
   1603   } else
   1604     return false;
   1605 
   1606   assert(FI != INT_MAX);
   1607   if (!MFI->isFixedObjectIndex(FI))
   1608     return false;
   1609   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
   1610 }
   1611 
   1612 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   1613 /// for tail call optimization. Targets which want to do tail call
   1614 /// optimization should implement this function.
   1615 bool
   1616 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   1617                                                      CallingConv::ID CalleeCC,
   1618                                                      bool isVarArg,
   1619                                                      bool isCalleeStructRet,
   1620                                                      bool isCallerStructRet,
   1621                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   1622                                     const SmallVectorImpl<SDValue> &OutVals,
   1623                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   1624                                                      SelectionDAG& DAG) const {
   1625   const Function *CallerF = DAG.getMachineFunction().getFunction();
   1626   CallingConv::ID CallerCC = CallerF->getCallingConv();
   1627   bool CCMatch = CallerCC == CalleeCC;
   1628 
   1629   // Look for obvious safe cases to perform tail call optimization that do not
   1630   // require ABI changes. This is what gcc calls sibcall.
   1631 
   1632   // Do not sibcall optimize vararg calls unless the call site is not passing
   1633   // any arguments.
   1634   if (isVarArg && !Outs.empty())
   1635     return false;
   1636 
   1637   // Also avoid sibcall optimization if either caller or callee uses struct
   1638   // return semantics.
   1639   if (isCalleeStructRet || isCallerStructRet)
   1640     return false;
   1641 
   1642   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
   1643   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
   1644   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
   1645   // support in the assembler and linker to be used. This would need to be
   1646   // fixed to fully support tail calls in Thumb1.
   1647   //
   1648   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   1649   // LR.  This means if we need to reload LR, it takes an extra instructions,
   1650   // which outweighs the value of the tail call; but here we don't know yet
   1651   // whether LR is going to be used.  Probably the right approach is to
   1652   // generate the tail call here and turn it back into CALL/RET in
   1653   // emitEpilogue if LR is used.
   1654 
   1655   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   1656   // but we need to make sure there are enough registers; the only valid
   1657   // registers are the 4 used for parameters.  We don't currently do this
   1658   // case.
   1659   if (Subtarget->isThumb1Only())
   1660     return false;
   1661 
   1662   // If the calling conventions do not match, then we'd better make sure the
   1663   // results are returned in the same way as what the caller expects.
   1664   if (!CCMatch) {
   1665     SmallVector<CCValAssign, 16> RVLocs1;
   1666     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
   1667                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
   1668     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
   1669 
   1670     SmallVector<CCValAssign, 16> RVLocs2;
   1671     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
   1672                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
   1673     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
   1674 
   1675     if (RVLocs1.size() != RVLocs2.size())
   1676       return false;
   1677     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
   1678       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
   1679         return false;
   1680       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
   1681         return false;
   1682       if (RVLocs1[i].isRegLoc()) {
   1683         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
   1684           return false;
   1685       } else {
   1686         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
   1687           return false;
   1688       }
   1689     }
   1690   }
   1691 
   1692   // If the callee takes no arguments then go on to check the results of the
   1693   // call.
   1694   if (!Outs.empty()) {
   1695     // Check if stack adjustment is needed. For now, do not do this if any
   1696     // argument is passed on the stack.
   1697     SmallVector<CCValAssign, 16> ArgLocs;
   1698     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
   1699                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1700     CCInfo.AnalyzeCallOperands(Outs,
   1701                                CCAssignFnForNode(CalleeCC, false, isVarArg));
   1702     if (CCInfo.getNextStackOffset()) {
   1703       MachineFunction &MF = DAG.getMachineFunction();
   1704 
   1705       // Check if the arguments are already laid out in the right way as
   1706       // the caller's fixed stack objects.
   1707       MachineFrameInfo *MFI = MF.getFrameInfo();
   1708       const MachineRegisterInfo *MRI = &MF.getRegInfo();
   1709       const ARMInstrInfo *TII =
   1710         ((ARMTargetMachine&)getTargetMachine()).getInstrInfo();
   1711       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1712            i != e;
   1713            ++i, ++realArgIdx) {
   1714         CCValAssign &VA = ArgLocs[i];
   1715         EVT RegVT = VA.getLocVT();
   1716         SDValue Arg = OutVals[realArgIdx];
   1717         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1718         if (VA.getLocInfo() == CCValAssign::Indirect)
   1719           return false;
   1720         if (VA.needsCustom()) {
   1721           // f64 and vector types are split into multiple registers or
   1722           // register/stack-slot combinations.  The types will not match
   1723           // the registers; give up on memory f64 refs until we figure
   1724           // out what to do about this.
   1725           if (!VA.isRegLoc())
   1726             return false;
   1727           if (!ArgLocs[++i].isRegLoc())
   1728             return false;
   1729           if (RegVT == MVT::v2f64) {
   1730             if (!ArgLocs[++i].isRegLoc())
   1731               return false;
   1732             if (!ArgLocs[++i].isRegLoc())
   1733               return false;
   1734           }
   1735         } else if (!VA.isRegLoc()) {
   1736           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
   1737                                    MFI, MRI, TII))
   1738             return false;
   1739         }
   1740       }
   1741     }
   1742   }
   1743 
   1744   return true;
   1745 }
   1746 
   1747 SDValue
   1748 ARMTargetLowering::LowerReturn(SDValue Chain,
   1749                                CallingConv::ID CallConv, bool isVarArg,
   1750                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   1751                                const SmallVectorImpl<SDValue> &OutVals,
   1752                                DebugLoc dl, SelectionDAG &DAG) const {
   1753 
   1754   // CCValAssign - represent the assignment of the return value to a location.
   1755   SmallVector<CCValAssign, 16> RVLocs;
   1756 
   1757   // CCState - Info about the registers and stack slots.
   1758   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1759                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1760 
   1761   // Analyze outgoing return values.
   1762   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
   1763                                                isVarArg));
   1764 
   1765   // If this is the first return lowered for this function, add
   1766   // the regs to the liveout set for the function.
   1767   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
   1768     for (unsigned i = 0; i != RVLocs.size(); ++i)
   1769       if (RVLocs[i].isRegLoc())
   1770         DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
   1771   }
   1772 
   1773   SDValue Flag;
   1774 
   1775   // Copy the result values into the output registers.
   1776   for (unsigned i = 0, realRVLocIdx = 0;
   1777        i != RVLocs.size();
   1778        ++i, ++realRVLocIdx) {
   1779     CCValAssign &VA = RVLocs[i];
   1780     assert(VA.isRegLoc() && "Can only return in registers!");
   1781 
   1782     SDValue Arg = OutVals[realRVLocIdx];
   1783 
   1784     switch (VA.getLocInfo()) {
   1785     default: llvm_unreachable("Unknown loc info!");
   1786     case CCValAssign::Full: break;
   1787     case CCValAssign::BCvt:
   1788       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1789       break;
   1790     }
   1791 
   1792     if (VA.needsCustom()) {
   1793       if (VA.getLocVT() == MVT::v2f64) {
   1794         // Extract the first half and return it in two registers.
   1795         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1796                                    DAG.getConstant(0, MVT::i32));
   1797         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
   1798                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
   1799 
   1800         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
   1801         Flag = Chain.getValue(1);
   1802         VA = RVLocs[++i]; // skip ahead to next loc
   1803         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
   1804                                  HalfGPRs.getValue(1), Flag);
   1805         Flag = Chain.getValue(1);
   1806         VA = RVLocs[++i]; // skip ahead to next loc
   1807 
   1808         // Extract the 2nd half and fall through to handle it as an f64 value.
   1809         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1810                           DAG.getConstant(1, MVT::i32));
   1811       }
   1812       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
   1813       // available.
   1814       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   1815                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
   1816       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
   1817       Flag = Chain.getValue(1);
   1818       VA = RVLocs[++i]; // skip ahead to next loc
   1819       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
   1820                                Flag);
   1821     } else
   1822       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
   1823 
   1824     // Guarantee that all emitted copies are
   1825     // stuck together, avoiding something bad.
   1826     Flag = Chain.getValue(1);
   1827   }
   1828 
   1829   SDValue result;
   1830   if (Flag.getNode())
   1831     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
   1832   else // Return Void
   1833     result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain);
   1834 
   1835   return result;
   1836 }
   1837 
   1838 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const {
   1839   if (N->getNumValues() != 1)
   1840     return false;
   1841   if (!N->hasNUsesOfValue(1, 0))
   1842     return false;
   1843 
   1844   unsigned NumCopies = 0;
   1845   SDNode* Copies[2];
   1846   SDNode *Use = *N->use_begin();
   1847   if (Use->getOpcode() == ISD::CopyToReg) {
   1848     Copies[NumCopies++] = Use;
   1849   } else if (Use->getOpcode() == ARMISD::VMOVRRD) {
   1850     // f64 returned in a pair of GPRs.
   1851     for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end();
   1852          UI != UE; ++UI) {
   1853       if (UI->getOpcode() != ISD::CopyToReg)
   1854         return false;
   1855       Copies[UI.getUse().getResNo()] = *UI;
   1856       ++NumCopies;
   1857     }
   1858   } else if (Use->getOpcode() == ISD::BITCAST) {
   1859     // f32 returned in a single GPR.
   1860     if (!Use->hasNUsesOfValue(1, 0))
   1861       return false;
   1862     Use = *Use->use_begin();
   1863     if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0))
   1864       return false;
   1865     Copies[NumCopies++] = Use;
   1866   } else {
   1867     return false;
   1868   }
   1869 
   1870   if (NumCopies != 1 && NumCopies != 2)
   1871     return false;
   1872 
   1873   bool HasRet = false;
   1874   for (unsigned i = 0; i < NumCopies; ++i) {
   1875     SDNode *Copy = Copies[i];
   1876     for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
   1877          UI != UE; ++UI) {
   1878       if (UI->getOpcode() == ISD::CopyToReg) {
   1879         SDNode *Use = *UI;
   1880         if (Use == Copies[0] || Use == Copies[1])
   1881           continue;
   1882         return false;
   1883       }
   1884       if (UI->getOpcode() != ARMISD::RET_FLAG)
   1885         return false;
   1886       HasRet = true;
   1887     }
   1888   }
   1889 
   1890   return HasRet;
   1891 }
   1892 
   1893 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   1894   if (!EnableARMTailCalls)
   1895     return false;
   1896 
   1897   if (!CI->isTailCall())
   1898     return false;
   1899 
   1900   return !Subtarget->isThumb1Only();
   1901 }
   1902 
   1903 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
   1904 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
   1905 // one of the above mentioned nodes. It has to be wrapped because otherwise
   1906 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
   1907 // be used to form addressing mode. These wrapped nodes will be selected
   1908 // into MOVi.
   1909 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   1910   EVT PtrVT = Op.getValueType();
   1911   // FIXME there is no actual debug info here
   1912   DebugLoc dl = Op.getDebugLoc();
   1913   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   1914   SDValue Res;
   1915   if (CP->isMachineConstantPoolEntry())
   1916     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
   1917                                     CP->getAlignment());
   1918   else
   1919     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
   1920                                     CP->getAlignment());
   1921   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
   1922 }
   1923 
   1924 unsigned ARMTargetLowering::getJumpTableEncoding() const {
   1925   return MachineJumpTableInfo::EK_Inline;
   1926 }
   1927 
   1928 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   1929                                              SelectionDAG &DAG) const {
   1930   MachineFunction &MF = DAG.getMachineFunction();
   1931   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1932   unsigned ARMPCLabelIndex = 0;
   1933   DebugLoc DL = Op.getDebugLoc();
   1934   EVT PtrVT = getPointerTy();
   1935   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   1936   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   1937   SDValue CPAddr;
   1938   if (RelocM == Reloc::Static) {
   1939     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   1940   } else {
   1941     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   1942     ARMPCLabelIndex = AFI->createPICLabelUId();
   1943     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
   1944                                                          ARMCP::CPBlockAddress,
   1945                                                          PCAdj);
   1946     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   1947   }
   1948   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   1949   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
   1950                                MachinePointerInfo::getConstantPool(),
   1951                                false, false, 0);
   1952   if (RelocM == Reloc::Static)
   1953     return Result;
   1954   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1955   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
   1956 }
   1957 
   1958 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
   1959 SDValue
   1960 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   1961                                                  SelectionDAG &DAG) const {
   1962   DebugLoc dl = GA->getDebugLoc();
   1963   EVT PtrVT = getPointerTy();
   1964   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   1965   MachineFunction &MF = DAG.getMachineFunction();
   1966   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1967   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1968   ARMConstantPoolValue *CPV =
   1969     new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
   1970                              ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   1971   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   1972   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   1973   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
   1974                          MachinePointerInfo::getConstantPool(),
   1975                          false, false, 0);
   1976   SDValue Chain = Argument.getValue(1);
   1977 
   1978   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1979   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
   1980 
   1981   // call __tls_get_addr.
   1982   ArgListTy Args;
   1983   ArgListEntry Entry;
   1984   Entry.Node = Argument;
   1985   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
   1986   Args.push_back(Entry);
   1987   // FIXME: is there useful debug info available here?
   1988   std::pair<SDValue, SDValue> CallResult =
   1989     LowerCallTo(Chain, (Type *) Type::getInt32Ty(*DAG.getContext()),
   1990                 false, false, false, false,
   1991                 0, CallingConv::C, false, /*isReturnValueUsed=*/true,
   1992                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   1993   return CallResult.first;
   1994 }
   1995 
   1996 // Lower ISD::GlobalTLSAddress using the "initial exec" or
   1997 // "local exec" model.
   1998 SDValue
   1999 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   2000                                         SelectionDAG &DAG) const {
   2001   const GlobalValue *GV = GA->getGlobal();
   2002   DebugLoc dl = GA->getDebugLoc();
   2003   SDValue Offset;
   2004   SDValue Chain = DAG.getEntryNode();
   2005   EVT PtrVT = getPointerTy();
   2006   // Get the Thread Pointer
   2007   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2008 
   2009   if (GV->isDeclaration()) {
   2010     MachineFunction &MF = DAG.getMachineFunction();
   2011     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2012     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2013     // Initial exec model.
   2014     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2015     ARMConstantPoolValue *CPV =
   2016       new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
   2017                                ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true);
   2018     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2019     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2020     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2021                          MachinePointerInfo::getConstantPool(),
   2022                          false, false, 0);
   2023     Chain = Offset.getValue(1);
   2024 
   2025     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2026     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
   2027 
   2028     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2029                          MachinePointerInfo::getConstantPool(),
   2030                          false, false, 0);
   2031   } else {
   2032     // local exec model
   2033     ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF);
   2034     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2035     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2036     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2037                          MachinePointerInfo::getConstantPool(),
   2038                          false, false, 0);
   2039   }
   2040 
   2041   // The address of the thread local variable is the add of the thread
   2042   // pointer with the offset of the variable.
   2043   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
   2044 }
   2045 
   2046 SDValue
   2047 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   2048   // TODO: implement the "local dynamic" model
   2049   assert(Subtarget->isTargetELF() &&
   2050          "TLS not implemented for non-ELF targets");
   2051   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   2052   // If the relocation model is PIC, use the "General Dynamic" TLS Model,
   2053   // otherwise use the "Local Exec" TLS Model
   2054   if (getTargetMachine().getRelocationModel() == Reloc::PIC_)
   2055     return LowerToTLSGeneralDynamicModel(GA, DAG);
   2056   else
   2057     return LowerToTLSExecModels(GA, DAG);
   2058 }
   2059 
   2060 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   2061                                                  SelectionDAG &DAG) const {
   2062   EVT PtrVT = getPointerTy();
   2063   DebugLoc dl = Op.getDebugLoc();
   2064   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2065   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2066   if (RelocM == Reloc::PIC_) {
   2067     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2068     ARMConstantPoolValue *CPV =
   2069       new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2070     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2071     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2072     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
   2073                                  CPAddr,
   2074                                  MachinePointerInfo::getConstantPool(),
   2075                                  false, false, 0);
   2076     SDValue Chain = Result.getValue(1);
   2077     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   2078     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
   2079     if (!UseGOTOFF)
   2080       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
   2081                            MachinePointerInfo::getGOT(), false, false, 0);
   2082     return Result;
   2083   }
   2084 
   2085   // If we have T2 ops, we can materialize the address directly via movt/movw
   2086   // pair. This is always cheaper.
   2087   if (Subtarget->useMovt()) {
   2088     ++NumMovwMovt;
   2089     // FIXME: Once remat is capable of dealing with instructions with register
   2090     // operands, expand this into two nodes.
   2091     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2092                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2093   } else {
   2094     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2095     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2096     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2097                        MachinePointerInfo::getConstantPool(),
   2098                        false, false, 0);
   2099   }
   2100 }
   2101 
   2102 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   2103                                                     SelectionDAG &DAG) const {
   2104   EVT PtrVT = getPointerTy();
   2105   DebugLoc dl = Op.getDebugLoc();
   2106   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2107   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2108   MachineFunction &MF = DAG.getMachineFunction();
   2109   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2110 
   2111   // FIXME: Enable this for static codegen when tool issues are fixed.
   2112   if (Subtarget->useMovt() && RelocM != Reloc::Static) {
   2113     ++NumMovwMovt;
   2114     // FIXME: Once remat is capable of dealing with instructions with register
   2115     // operands, expand this into two nodes.
   2116     if (RelocM == Reloc::Static)
   2117       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2118                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2119 
   2120     unsigned Wrapper = (RelocM == Reloc::PIC_)
   2121       ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
   2122     SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
   2123                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2124     if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2125       Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
   2126                            MachinePointerInfo::getGOT(), false, false, 0);
   2127     return Result;
   2128   }
   2129 
   2130   unsigned ARMPCLabelIndex = 0;
   2131   SDValue CPAddr;
   2132   if (RelocM == Reloc::Static) {
   2133     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2134   } else {
   2135     ARMPCLabelIndex = AFI->createPICLabelUId();
   2136     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
   2137     ARMConstantPoolValue *CPV =
   2138       new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
   2139     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2140   }
   2141   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2142 
   2143   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2144                                MachinePointerInfo::getConstantPool(),
   2145                                false, false, 0);
   2146   SDValue Chain = Result.getValue(1);
   2147 
   2148   if (RelocM == Reloc::PIC_) {
   2149     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2150     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2151   }
   2152 
   2153   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2154     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
   2155                          false, false, 0);
   2156 
   2157   return Result;
   2158 }
   2159 
   2160 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   2161                                                     SelectionDAG &DAG) const {
   2162   assert(Subtarget->isTargetELF() &&
   2163          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   2164   MachineFunction &MF = DAG.getMachineFunction();
   2165   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2166   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2167   EVT PtrVT = getPointerTy();
   2168   DebugLoc dl = Op.getDebugLoc();
   2169   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2170   ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
   2171                                                        "_GLOBAL_OFFSET_TABLE_",
   2172                                                        ARMPCLabelIndex, PCAdj);
   2173   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2174   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2175   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2176                                MachinePointerInfo::getConstantPool(),
   2177                                false, false, 0);
   2178   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2179   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2180 }
   2181 
   2182 SDValue
   2183 ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG)
   2184   const {
   2185   DebugLoc dl = Op.getDebugLoc();
   2186   return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
   2187                      Op.getOperand(0), Op.getOperand(1));
   2188 }
   2189 
   2190 SDValue
   2191 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   2192   DebugLoc dl = Op.getDebugLoc();
   2193   SDValue Val = DAG.getConstant(0, MVT::i32);
   2194   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0),
   2195                      Op.getOperand(1), Val);
   2196 }
   2197 
   2198 SDValue
   2199 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   2200   DebugLoc dl = Op.getDebugLoc();
   2201   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
   2202                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
   2203 }
   2204 
   2205 SDValue
   2206 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   2207                                           const ARMSubtarget *Subtarget) const {
   2208   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2209   DebugLoc dl = Op.getDebugLoc();
   2210   switch (IntNo) {
   2211   default: return SDValue();    // Don't custom lower most intrinsics.
   2212   case Intrinsic::arm_thread_pointer: {
   2213     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2214     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2215   }
   2216   case Intrinsic::eh_sjlj_lsda: {
   2217     MachineFunction &MF = DAG.getMachineFunction();
   2218     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2219     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2220     EVT PtrVT = getPointerTy();
   2221     DebugLoc dl = Op.getDebugLoc();
   2222     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2223     SDValue CPAddr;
   2224     unsigned PCAdj = (RelocM != Reloc::PIC_)
   2225       ? 0 : (Subtarget->isThumb() ? 4 : 8);
   2226     ARMConstantPoolValue *CPV =
   2227       new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex,
   2228                                ARMCP::CPLSDA, PCAdj);
   2229     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2230     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2231     SDValue Result =
   2232       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2233                   MachinePointerInfo::getConstantPool(),
   2234                   false, false, 0);
   2235 
   2236     if (RelocM == Reloc::PIC_) {
   2237       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2238       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2239     }
   2240     return Result;
   2241   }
   2242   case Intrinsic::arm_neon_vmulls:
   2243   case Intrinsic::arm_neon_vmullu: {
   2244     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
   2245       ? ARMISD::VMULLs : ARMISD::VMULLu;
   2246     return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
   2247                        Op.getOperand(1), Op.getOperand(2));
   2248   }
   2249   }
   2250 }
   2251 
   2252 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
   2253                                const ARMSubtarget *Subtarget) {
   2254   DebugLoc dl = Op.getDebugLoc();
   2255   if (!Subtarget->hasDataBarrier()) {
   2256     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2257     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2258     // here.
   2259     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2260            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2261     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2262                        DAG.getConstant(0, MVT::i32));
   2263   }
   2264 
   2265   SDValue Op5 = Op.getOperand(5);
   2266   bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
   2267   unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   2268   unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   2269   bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
   2270 
   2271   ARM_MB::MemBOpt DMBOpt;
   2272   if (isDeviceBarrier)
   2273     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
   2274   else
   2275     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
   2276   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2277                      DAG.getConstant(DMBOpt, MVT::i32));
   2278 }
   2279 
   2280 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
   2281                              const ARMSubtarget *Subtarget) {
   2282   // ARM pre v5TE and Thumb1 does not have preload instructions.
   2283   if (!(Subtarget->isThumb2() ||
   2284         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
   2285     // Just preserve the chain.
   2286     return Op.getOperand(0);
   2287 
   2288   DebugLoc dl = Op.getDebugLoc();
   2289   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
   2290   if (!isRead &&
   2291       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
   2292     // ARMv7 with MP extension has PLDW.
   2293     return Op.getOperand(0);
   2294 
   2295   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
   2296   if (Subtarget->isThumb()) {
   2297     // Invert the bits.
   2298     isRead = ~isRead & 1;
   2299     isData = ~isData & 1;
   2300   }
   2301 
   2302   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
   2303                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
   2304                      DAG.getConstant(isData, MVT::i32));
   2305 }
   2306 
   2307 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   2308   MachineFunction &MF = DAG.getMachineFunction();
   2309   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
   2310 
   2311   // vastart just stores the address of the VarArgsFrameIndex slot into the
   2312   // memory location argument.
   2313   DebugLoc dl = Op.getDebugLoc();
   2314   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2315   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   2316   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2317   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
   2318                       MachinePointerInfo(SV), false, false, 0);
   2319 }
   2320 
   2321 SDValue
   2322 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   2323                                         SDValue &Root, SelectionDAG &DAG,
   2324                                         DebugLoc dl) const {
   2325   MachineFunction &MF = DAG.getMachineFunction();
   2326   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2327 
   2328   TargetRegisterClass *RC;
   2329   if (AFI->isThumb1OnlyFunction())
   2330     RC = ARM::tGPRRegisterClass;
   2331   else
   2332     RC = ARM::GPRRegisterClass;
   2333 
   2334   // Transform the arguments stored in physical registers into virtual ones.
   2335   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2336   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2337 
   2338   SDValue ArgValue2;
   2339   if (NextVA.isMemLoc()) {
   2340     MachineFrameInfo *MFI = MF.getFrameInfo();
   2341     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
   2342 
   2343     // Create load node to retrieve arguments from the stack.
   2344     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2345     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
   2346                             MachinePointerInfo::getFixedStack(FI),
   2347                             false, false, 0);
   2348   } else {
   2349     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
   2350     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2351   }
   2352 
   2353   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
   2354 }
   2355 
   2356 void
   2357 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   2358                                   unsigned &VARegSize, unsigned &VARegSaveSize)
   2359   const {
   2360   unsigned NumGPRs;
   2361   if (CCInfo.isFirstByValRegValid())
   2362     NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
   2363   else {
   2364     unsigned int firstUnalloced;
   2365     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
   2366                                                 sizeof(GPRArgRegs) /
   2367                                                 sizeof(GPRArgRegs[0]));
   2368     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
   2369   }
   2370 
   2371   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
   2372   VARegSize = NumGPRs * 4;
   2373   VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
   2374 }
   2375 
   2376 // The remaining GPRs hold either the beginning of variable-argument
   2377 // data, or the beginning of an aggregate passed by value (usuall
   2378 // byval).  Either way, we allocate stack slots adjacent to the data
   2379 // provided by our caller, and store the unallocated registers there.
   2380 // If this is a variadic function, the va_list pointer will begin with
   2381 // these values; otherwise, this reassembles a (byval) structure that
   2382 // was split between registers and memory.
   2383 void
   2384 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   2385                                         DebugLoc dl, SDValue &Chain,
   2386                                         unsigned ArgOffset) const {
   2387   MachineFunction &MF = DAG.getMachineFunction();
   2388   MachineFrameInfo *MFI = MF.getFrameInfo();
   2389   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2390   unsigned firstRegToSaveIndex;
   2391   if (CCInfo.isFirstByValRegValid())
   2392     firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
   2393   else {
   2394     firstRegToSaveIndex = CCInfo.getFirstUnallocated
   2395       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
   2396   }
   2397 
   2398   unsigned VARegSize, VARegSaveSize;
   2399   computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
   2400   if (VARegSaveSize) {
   2401     // If this function is vararg, store any remaining integer argument regs
   2402     // to their spots on the stack so that they may be loaded by deferencing
   2403     // the result of va_next.
   2404     AFI->setVarArgsRegSaveSize(VARegSaveSize);
   2405     AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
   2406                                                      ArgOffset + VARegSaveSize
   2407                                                      - VARegSize,
   2408                                                      false));
   2409     SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
   2410                                     getPointerTy());
   2411 
   2412     SmallVector<SDValue, 4> MemOps;
   2413     for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) {
   2414       TargetRegisterClass *RC;
   2415       if (AFI->isThumb1OnlyFunction())
   2416         RC = ARM::tGPRRegisterClass;
   2417       else
   2418         RC = ARM::GPRRegisterClass;
   2419 
   2420       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
   2421       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
   2422       SDValue Store =
   2423         DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2424                  MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()),
   2425                      false, false, 0);
   2426       MemOps.push_back(Store);
   2427       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
   2428                         DAG.getConstant(4, getPointerTy()));
   2429     }
   2430     if (!MemOps.empty())
   2431       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2432                           &MemOps[0], MemOps.size());
   2433   } else
   2434     // This will point to the next argument passed via stack.
   2435     AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true));
   2436 }
   2437 
   2438 SDValue
   2439 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   2440                                         CallingConv::ID CallConv, bool isVarArg,
   2441                                         const SmallVectorImpl<ISD::InputArg>
   2442                                           &Ins,
   2443                                         DebugLoc dl, SelectionDAG &DAG,
   2444                                         SmallVectorImpl<SDValue> &InVals)
   2445                                           const {
   2446   MachineFunction &MF = DAG.getMachineFunction();
   2447   MachineFrameInfo *MFI = MF.getFrameInfo();
   2448 
   2449   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2450 
   2451   // Assign locations to all of the incoming arguments.
   2452   SmallVector<CCValAssign, 16> ArgLocs;
   2453   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2454                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
   2455   CCInfo.AnalyzeFormalArguments(Ins,
   2456                                 CCAssignFnForNode(CallConv, /* Return*/ false,
   2457                                                   isVarArg));
   2458 
   2459   SmallVector<SDValue, 16> ArgValues;
   2460   int lastInsIndex = -1;
   2461 
   2462   SDValue ArgValue;
   2463   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2464     CCValAssign &VA = ArgLocs[i];
   2465 
   2466     // Arguments stored in registers.
   2467     if (VA.isRegLoc()) {
   2468       EVT RegVT = VA.getLocVT();
   2469 
   2470       if (VA.needsCustom()) {
   2471         // f64 and vector types are split up into multiple registers or
   2472         // combinations of registers and stack slots.
   2473         if (VA.getLocVT() == MVT::v2f64) {
   2474           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
   2475                                                    Chain, DAG, dl);
   2476           VA = ArgLocs[++i]; // skip ahead to next loc
   2477           SDValue ArgValue2;
   2478           if (VA.isMemLoc()) {
   2479             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
   2480             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2481             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
   2482                                     MachinePointerInfo::getFixedStack(FI),
   2483                                     false, false, 0);
   2484           } else {
   2485             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
   2486                                              Chain, DAG, dl);
   2487           }
   2488           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   2489           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2490                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
   2491           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2492                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
   2493         } else
   2494           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
   2495 
   2496       } else {
   2497         TargetRegisterClass *RC;
   2498 
   2499         if (RegVT == MVT::f32)
   2500           RC = ARM::SPRRegisterClass;
   2501         else if (RegVT == MVT::f64)
   2502           RC = ARM::DPRRegisterClass;
   2503         else if (RegVT == MVT::v2f64)
   2504           RC = ARM::QPRRegisterClass;
   2505         else if (RegVT == MVT::i32)
   2506           RC = (AFI->isThumb1OnlyFunction() ?
   2507                 ARM::tGPRRegisterClass : ARM::GPRRegisterClass);
   2508         else
   2509           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
   2510 
   2511         // Transform the arguments in physical registers into virtual ones.
   2512         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2513         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
   2514       }
   2515 
   2516       // If this is an 8 or 16-bit value, it is really passed promoted
   2517       // to 32 bits.  Insert an assert[sz]ext to capture this, then
   2518       // truncate to the right size.
   2519       switch (VA.getLocInfo()) {
   2520       default: llvm_unreachable("Unknown loc info!");
   2521       case CCValAssign::Full: break;
   2522       case CCValAssign::BCvt:
   2523         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
   2524         break;
   2525       case CCValAssign::SExt:
   2526         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
   2527                                DAG.getValueType(VA.getValVT()));
   2528         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2529         break;
   2530       case CCValAssign::ZExt:
   2531         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
   2532                                DAG.getValueType(VA.getValVT()));
   2533         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2534         break;
   2535       }
   2536 
   2537       InVals.push_back(ArgValue);
   2538 
   2539     } else { // VA.isRegLoc()
   2540 
   2541       // sanity check
   2542       assert(VA.isMemLoc());
   2543       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
   2544 
   2545       int index = ArgLocs[i].getValNo();
   2546 
   2547       // Some Ins[] entries become multiple ArgLoc[] entries.
   2548       // Process them only once.
   2549       if (index != lastInsIndex)
   2550         {
   2551           ISD::ArgFlagsTy Flags = Ins[index].Flags;
   2552           // FIXME: For now, all byval parameter objects are marked mutable.
   2553           // This can be changed with more analysis.
   2554           // In case of tail call optimization mark all arguments mutable.
   2555           // Since they could be overwritten by lowering of arguments in case of
   2556           // a tail call.
   2557           if (Flags.isByVal()) {
   2558             unsigned VARegSize, VARegSaveSize;
   2559             computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
   2560             VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0);
   2561             unsigned Bytes = Flags.getByValSize() - VARegSize;
   2562             if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
   2563             int FI = MFI->CreateFixedObject(Bytes,
   2564                                             VA.getLocMemOffset(), false);
   2565             InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
   2566           } else {
   2567             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
   2568                                             VA.getLocMemOffset(), true);
   2569 
   2570             // Create load nodes to retrieve arguments from the stack.
   2571             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2572             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
   2573                                          MachinePointerInfo::getFixedStack(FI),
   2574                                          false, false, 0));
   2575           }
   2576           lastInsIndex = index;
   2577         }
   2578     }
   2579   }
   2580 
   2581   // varargs
   2582   if (isVarArg)
   2583     VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset());
   2584 
   2585   return Chain;
   2586 }
   2587 
   2588 /// isFloatingPointZero - Return true if this is +0.0.
   2589 static bool isFloatingPointZero(SDValue Op) {
   2590   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
   2591     return CFP->getValueAPF().isPosZero();
   2592   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
   2593     // Maybe this has already been legalized into the constant pool?
   2594     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
   2595       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
   2596       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
   2597         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
   2598           return CFP->getValueAPF().isPosZero();
   2599     }
   2600   }
   2601   return false;
   2602 }
   2603 
   2604 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
   2605 /// the given operands.
   2606 SDValue
   2607 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   2608                              SDValue &ARMcc, SelectionDAG &DAG,
   2609                              DebugLoc dl) const {
   2610   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
   2611     unsigned C = RHSC->getZExtValue();
   2612     if (!isLegalICmpImmediate(C)) {
   2613       // Constant does not fit, try adjusting it by one?
   2614       switch (CC) {
   2615       default: break;
   2616       case ISD::SETLT:
   2617       case ISD::SETGE:
   2618         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
   2619           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
   2620           RHS = DAG.getConstant(C-1, MVT::i32);
   2621         }
   2622         break;
   2623       case ISD::SETULT:
   2624       case ISD::SETUGE:
   2625         if (C != 0 && isLegalICmpImmediate(C-1)) {
   2626           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
   2627           RHS = DAG.getConstant(C-1, MVT::i32);
   2628         }
   2629         break;
   2630       case ISD::SETLE:
   2631       case ISD::SETGT:
   2632         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
   2633           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
   2634           RHS = DAG.getConstant(C+1, MVT::i32);
   2635         }
   2636         break;
   2637       case ISD::SETULE:
   2638       case ISD::SETUGT:
   2639         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
   2640           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
   2641           RHS = DAG.getConstant(C+1, MVT::i32);
   2642         }
   2643         break;
   2644       }
   2645     }
   2646   }
   2647 
   2648   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   2649   ARMISD::NodeType CompareType;
   2650   switch (CondCode) {
   2651   default:
   2652     CompareType = ARMISD::CMP;
   2653     break;
   2654   case ARMCC::EQ:
   2655   case ARMCC::NE:
   2656     // Uses only Z Flag
   2657     CompareType = ARMISD::CMPZ;
   2658     break;
   2659   }
   2660   ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2661   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
   2662 }
   2663 
   2664 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
   2665 SDValue
   2666 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   2667                              DebugLoc dl) const {
   2668   SDValue Cmp;
   2669   if (!isFloatingPointZero(RHS))
   2670     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
   2671   else
   2672     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
   2673   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
   2674 }
   2675 
   2676 /// duplicateCmp - Glue values can have only one use, so this function
   2677 /// duplicates a comparison node.
   2678 SDValue
   2679 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
   2680   unsigned Opc = Cmp.getOpcode();
   2681   DebugLoc DL = Cmp.getDebugLoc();
   2682   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
   2683     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2684 
   2685   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
   2686   Cmp = Cmp.getOperand(0);
   2687   Opc = Cmp.getOpcode();
   2688   if (Opc == ARMISD::CMPFP)
   2689     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2690   else {
   2691     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
   2692     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
   2693   }
   2694   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
   2695 }
   2696 
   2697 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   2698   SDValue Cond = Op.getOperand(0);
   2699   SDValue SelectTrue = Op.getOperand(1);
   2700   SDValue SelectFalse = Op.getOperand(2);
   2701   DebugLoc dl = Op.getDebugLoc();
   2702 
   2703   // Convert:
   2704   //
   2705   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
   2706   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
   2707   //
   2708   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
   2709     const ConstantSDNode *CMOVTrue =
   2710       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
   2711     const ConstantSDNode *CMOVFalse =
   2712       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
   2713 
   2714     if (CMOVTrue && CMOVFalse) {
   2715       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
   2716       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
   2717 
   2718       SDValue True;
   2719       SDValue False;
   2720       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
   2721         True = SelectTrue;
   2722         False = SelectFalse;
   2723       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
   2724         True = SelectFalse;
   2725         False = SelectTrue;
   2726       }
   2727 
   2728       if (True.getNode() && False.getNode()) {
   2729         EVT VT = Op.getValueType();
   2730         SDValue ARMcc = Cond.getOperand(2);
   2731         SDValue CCR = Cond.getOperand(3);
   2732         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
   2733         assert(True.getValueType() == VT);
   2734         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
   2735       }
   2736     }
   2737   }
   2738 
   2739   return DAG.getSelectCC(dl, Cond,
   2740                          DAG.getConstant(0, Cond.getValueType()),
   2741                          SelectTrue, SelectFalse, ISD::SETNE);
   2742 }
   2743 
   2744 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   2745   EVT VT = Op.getValueType();
   2746   SDValue LHS = Op.getOperand(0);
   2747   SDValue RHS = Op.getOperand(1);
   2748   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   2749   SDValue TrueVal = Op.getOperand(2);
   2750   SDValue FalseVal = Op.getOperand(3);
   2751   DebugLoc dl = Op.getDebugLoc();
   2752 
   2753   if (LHS.getValueType() == MVT::i32) {
   2754     SDValue ARMcc;
   2755     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2756     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   2757     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
   2758   }
   2759 
   2760   ARMCC::CondCodes CondCode, CondCode2;
   2761   FPCCToARMCC(CC, CondCode, CondCode2);
   2762 
   2763   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2764   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   2765   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2766   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
   2767                                ARMcc, CCR, Cmp);
   2768   if (CondCode2 != ARMCC::AL) {
   2769     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
   2770     // FIXME: Needs another CMP because flag can have but one use.
   2771     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
   2772     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
   2773                          Result, TrueVal, ARMcc2, CCR, Cmp2);
   2774   }
   2775   return Result;
   2776 }
   2777 
   2778 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
   2779 /// to morph to an integer compare sequence.
   2780 static bool canChangeToInt(SDValue Op, bool &SeenZero,
   2781                            const ARMSubtarget *Subtarget) {
   2782   SDNode *N = Op.getNode();
   2783   if (!N->hasOneUse())
   2784     // Otherwise it requires moving the value from fp to integer registers.
   2785     return false;
   2786   if (!N->getNumValues())
   2787     return false;
   2788   EVT VT = Op.getValueType();
   2789   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
   2790     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
   2791     // vmrs are very slow, e.g. cortex-a8.
   2792     return false;
   2793 
   2794   if (isFloatingPointZero(Op)) {
   2795     SeenZero = true;
   2796     return true;
   2797   }
   2798   return ISD::isNormalLoad(N);
   2799 }
   2800 
   2801 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
   2802   if (isFloatingPointZero(Op))
   2803     return DAG.getConstant(0, MVT::i32);
   2804 
   2805   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
   2806     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   2807                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
   2808                        Ld->isVolatile(), Ld->isNonTemporal(),
   2809                        Ld->getAlignment());
   2810 
   2811   llvm_unreachable("Unknown VFP cmp argument!");
   2812 }
   2813 
   2814 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
   2815                            SDValue &RetVal1, SDValue &RetVal2) {
   2816   if (isFloatingPointZero(Op)) {
   2817     RetVal1 = DAG.getConstant(0, MVT::i32);
   2818     RetVal2 = DAG.getConstant(0, MVT::i32);
   2819     return;
   2820   }
   2821 
   2822   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
   2823     SDValue Ptr = Ld->getBasePtr();
   2824     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   2825                           Ld->getChain(), Ptr,
   2826                           Ld->getPointerInfo(),
   2827                           Ld->isVolatile(), Ld->isNonTemporal(),
   2828                           Ld->getAlignment());
   2829 
   2830     EVT PtrType = Ptr.getValueType();
   2831     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
   2832     SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
   2833                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
   2834     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   2835                           Ld->getChain(), NewPtr,
   2836                           Ld->getPointerInfo().getWithOffset(4),
   2837                           Ld->isVolatile(), Ld->isNonTemporal(),
   2838                           NewAlign);
   2839     return;
   2840   }
   2841 
   2842   llvm_unreachable("Unknown VFP cmp argument!");
   2843 }
   2844 
   2845 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
   2846 /// f32 and even f64 comparisons to integer ones.
   2847 SDValue
   2848 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   2849   SDValue Chain = Op.getOperand(0);
   2850   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   2851   SDValue LHS = Op.getOperand(2);
   2852   SDValue RHS = Op.getOperand(3);
   2853   SDValue Dest = Op.getOperand(4);
   2854   DebugLoc dl = Op.getDebugLoc();
   2855 
   2856   bool SeenZero = false;
   2857   if (canChangeToInt(LHS, SeenZero, Subtarget) &&
   2858       canChangeToInt(RHS, SeenZero, Subtarget) &&
   2859       // If one of the operand is zero, it's safe to ignore the NaN case since
   2860       // we only care about equality comparisons.
   2861       (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) {
   2862     // If unsafe fp math optimization is enabled and there are no other uses of
   2863     // the CMP operands, and the condition code is EQ or NE, we can optimize it
   2864     // to an integer comparison.
   2865     if (CC == ISD::SETOEQ)
   2866       CC = ISD::SETEQ;
   2867     else if (CC == ISD::SETUNE)
   2868       CC = ISD::SETNE;
   2869 
   2870     SDValue ARMcc;
   2871     if (LHS.getValueType() == MVT::f32) {
   2872       LHS = bitcastf32Toi32(LHS, DAG);
   2873       RHS = bitcastf32Toi32(RHS, DAG);
   2874       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   2875       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2876       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   2877                          Chain, Dest, ARMcc, CCR, Cmp);
   2878     }
   2879 
   2880     SDValue LHS1, LHS2;
   2881     SDValue RHS1, RHS2;
   2882     expandf64Toi32(LHS, DAG, LHS1, LHS2);
   2883     expandf64Toi32(RHS, DAG, RHS1, RHS2);
   2884     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   2885     ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2886     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   2887     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
   2888     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
   2889   }
   2890 
   2891   return SDValue();
   2892 }
   2893 
   2894 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   2895   SDValue Chain = Op.getOperand(0);
   2896   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   2897   SDValue LHS = Op.getOperand(2);
   2898   SDValue RHS = Op.getOperand(3);
   2899   SDValue Dest = Op.getOperand(4);
   2900   DebugLoc dl = Op.getDebugLoc();
   2901 
   2902   if (LHS.getValueType() == MVT::i32) {
   2903     SDValue ARMcc;
   2904     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   2905     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2906     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   2907                        Chain, Dest, ARMcc, CCR, Cmp);
   2908   }
   2909 
   2910   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   2911 
   2912   if (UnsafeFPMath &&
   2913       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
   2914        CC == ISD::SETNE || CC == ISD::SETUNE)) {
   2915     SDValue Result = OptimizeVFPBrcond(Op, DAG);
   2916     if (Result.getNode())
   2917       return Result;
   2918   }
   2919 
   2920   ARMCC::CondCodes CondCode, CondCode2;
   2921   FPCCToARMCC(CC, CondCode, CondCode2);
   2922 
   2923   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2924   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   2925   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2926   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   2927   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   2928   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   2929   if (CondCode2 != ARMCC::AL) {
   2930     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
   2931     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
   2932     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   2933   }
   2934   return Res;
   2935 }
   2936 
   2937 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   2938   SDValue Chain = Op.getOperand(0);
   2939   SDValue Table = Op.getOperand(1);
   2940   SDValue Index = Op.getOperand(2);
   2941   DebugLoc dl = Op.getDebugLoc();
   2942 
   2943   EVT PTy = getPointerTy();
   2944   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   2945   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   2946   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   2947   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   2948   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
   2949   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
   2950   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
   2951   if (Subtarget->isThumb2()) {
   2952     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
   2953     // which does another jump to the destination. This also makes it easier
   2954     // to translate it to TBB / TBH later.
   2955     // FIXME: This might not work if the function is extremely large.
   2956     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
   2957                        Addr, Op.getOperand(2), JTI, UId);
   2958   }
   2959   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   2960     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
   2961                        MachinePointerInfo::getJumpTable(),
   2962                        false, false, 0);
   2963     Chain = Addr.getValue(1);
   2964     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
   2965     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   2966   } else {
   2967     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
   2968                        MachinePointerInfo::getJumpTable(), false, false, 0);
   2969     Chain = Addr.getValue(1);
   2970     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   2971   }
   2972 }
   2973 
   2974 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   2975   DebugLoc dl = Op.getDebugLoc();
   2976   unsigned Opc;
   2977 
   2978   switch (Op.getOpcode()) {
   2979   default:
   2980     assert(0 && "Invalid opcode!");
   2981   case ISD::FP_TO_SINT:
   2982     Opc = ARMISD::FTOSI;
   2983     break;
   2984   case ISD::FP_TO_UINT:
   2985     Opc = ARMISD::FTOUI;
   2986     break;
   2987   }
   2988   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
   2989   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   2990 }
   2991 
   2992 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   2993   EVT VT = Op.getValueType();
   2994   DebugLoc dl = Op.getDebugLoc();
   2995 
   2996   EVT OperandVT = Op.getOperand(0).getValueType();
   2997   assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!");
   2998   if (VT != MVT::v4f32)
   2999     return DAG.UnrollVectorOp(Op.getNode());
   3000 
   3001   unsigned CastOpc;
   3002   unsigned Opc;
   3003   switch (Op.getOpcode()) {
   3004   default:
   3005     assert(0 && "Invalid opcode!");
   3006   case ISD::SINT_TO_FP:
   3007     CastOpc = ISD::SIGN_EXTEND;
   3008     Opc = ISD::SINT_TO_FP;
   3009     break;
   3010   case ISD::UINT_TO_FP:
   3011     CastOpc = ISD::ZERO_EXTEND;
   3012     Opc = ISD::UINT_TO_FP;
   3013     break;
   3014   }
   3015 
   3016   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
   3017   return DAG.getNode(Opc, dl, VT, Op);
   3018 }
   3019 
   3020 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3021   EVT VT = Op.getValueType();
   3022   if (VT.isVector())
   3023     return LowerVectorINT_TO_FP(Op, DAG);
   3024 
   3025   DebugLoc dl = Op.getDebugLoc();
   3026   unsigned Opc;
   3027 
   3028   switch (Op.getOpcode()) {
   3029   default:
   3030     assert(0 && "Invalid opcode!");
   3031   case ISD::SINT_TO_FP:
   3032     Opc = ARMISD::SITOF;
   3033     break;
   3034   case ISD::UINT_TO_FP:
   3035     Opc = ARMISD::UITOF;
   3036     break;
   3037   }
   3038 
   3039   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   3040   return DAG.getNode(Opc, dl, VT, Op);
   3041 }
   3042 
   3043 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   3044   // Implement fcopysign with a fabs and a conditional fneg.
   3045   SDValue Tmp0 = Op.getOperand(0);
   3046   SDValue Tmp1 = Op.getOperand(1);
   3047   DebugLoc dl = Op.getDebugLoc();
   3048   EVT VT = Op.getValueType();
   3049   EVT SrcVT = Tmp1.getValueType();
   3050   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
   3051     Tmp0.getOpcode() == ARMISD::VMOVDRR;
   3052   bool UseNEON = !InGPR && Subtarget->hasNEON();
   3053 
   3054   if (UseNEON) {
   3055     // Use VBSL to copy the sign bit.
   3056     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
   3057     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
   3058                                DAG.getTargetConstant(EncodedVal, MVT::i32));
   3059     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
   3060     if (VT == MVT::f64)
   3061       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3062                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
   3063                          DAG.getConstant(32, MVT::i32));
   3064     else /*if (VT == MVT::f32)*/
   3065       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
   3066     if (SrcVT == MVT::f32) {
   3067       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
   3068       if (VT == MVT::f64)
   3069         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3070                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
   3071                            DAG.getConstant(32, MVT::i32));
   3072     } else if (VT == MVT::f32)
   3073       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
   3074                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
   3075                          DAG.getConstant(32, MVT::i32));
   3076     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
   3077     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
   3078 
   3079     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
   3080                                             MVT::i32);
   3081     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
   3082     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
   3083                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
   3084 
   3085     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
   3086                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
   3087                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
   3088     if (VT == MVT::f32) {
   3089       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
   3090       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
   3091                         DAG.getConstant(0, MVT::i32));
   3092     } else {
   3093       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
   3094     }
   3095 
   3096     return Res;
   3097   }
   3098 
   3099   // Bitcast operand 1 to i32.
   3100   if (SrcVT == MVT::f64)
   3101     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3102                        &Tmp1, 1).getValue(1);
   3103   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
   3104 
   3105   // Or in the signbit with integer operations.
   3106   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
   3107   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
   3108   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
   3109   if (VT == MVT::f32) {
   3110     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
   3111                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
   3112     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
   3113                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
   3114   }
   3115 
   3116   // f64: Or the high part with signbit and then combine two parts.
   3117   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3118                      &Tmp0, 1);
   3119   SDValue Lo = Tmp0.getValue(0);
   3120   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
   3121   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
   3122   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   3123 }
   3124 
   3125 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   3126   MachineFunction &MF = DAG.getMachineFunction();
   3127   MachineFrameInfo *MFI = MF.getFrameInfo();
   3128   MFI->setReturnAddressIsTaken(true);
   3129 
   3130   EVT VT = Op.getValueType();
   3131   DebugLoc dl = Op.getDebugLoc();
   3132   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3133   if (Depth) {
   3134     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   3135     SDValue Offset = DAG.getConstant(4, MVT::i32);
   3136     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
   3137                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
   3138                        MachinePointerInfo(), false, false, 0);
   3139   }
   3140 
   3141   // Return LR, which contains the return address. Mark it an implicit live-in.
   3142   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
   3143   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   3144 }
   3145 
   3146 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   3147   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   3148   MFI->setFrameAddressIsTaken(true);
   3149 
   3150   EVT VT = Op.getValueType();
   3151   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   3152   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3153   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
   3154     ? ARM::R7 : ARM::R11;
   3155   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   3156   while (Depth--)
   3157     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
   3158                             MachinePointerInfo(),
   3159                             false, false, 0);
   3160   return FrameAddr;
   3161 }
   3162 
   3163 /// ExpandBITCAST - If the target supports VFP, this function is called to
   3164 /// expand a bit convert where either the source or destination type is i64 to
   3165 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
   3166 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
   3167 /// vectors), since the legalizer won't know what to do with that.
   3168 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   3169   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   3170   DebugLoc dl = N->getDebugLoc();
   3171   SDValue Op = N->getOperand(0);
   3172 
   3173   // This function is only supposed to be called for i64 types, either as the
   3174   // source or destination of the bit convert.
   3175   EVT SrcVT = Op.getValueType();
   3176   EVT DstVT = N->getValueType(0);
   3177   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
   3178          "ExpandBITCAST called for non-i64 type");
   3179 
   3180   // Turn i64->f64 into VMOVDRR.
   3181   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
   3182     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3183                              DAG.getConstant(0, MVT::i32));
   3184     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3185                              DAG.getConstant(1, MVT::i32));
   3186     return DAG.getNode(ISD::BITCAST, dl, DstVT,
   3187                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   3188   }
   3189 
   3190   // Turn f64->i64 into VMOVRRD.
   3191   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
   3192     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
   3193                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
   3194     // Merge the pieces into a single i64 value.
   3195     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
   3196   }
   3197 
   3198   return SDValue();
   3199 }
   3200 
   3201 /// getZeroVector - Returns a vector of specified type with all zero elements.
   3202 /// Zero vectors are used to represent vector negation and in those cases
   3203 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
   3204 /// not support i64 elements, so sometimes the zero vectors will need to be
   3205 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
   3206 /// zero vector.
   3207 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   3208   assert(VT.isVector() && "Expected a vector type");
   3209   // The canonical modified immediate encoding of a zero vector is....0!
   3210   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
   3211   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   3212   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
   3213   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   3214 }
   3215 
   3216 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
   3217 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3218 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   3219                                                 SelectionDAG &DAG) const {
   3220   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3221   EVT VT = Op.getValueType();
   3222   unsigned VTBits = VT.getSizeInBits();
   3223   DebugLoc dl = Op.getDebugLoc();
   3224   SDValue ShOpLo = Op.getOperand(0);
   3225   SDValue ShOpHi = Op.getOperand(1);
   3226   SDValue ShAmt  = Op.getOperand(2);
   3227   SDValue ARMcc;
   3228   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
   3229 
   3230   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
   3231 
   3232   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3233                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3234   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   3235   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3236                                    DAG.getConstant(VTBits, MVT::i32));
   3237   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   3238   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3239   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   3240 
   3241   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3242   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3243                           ARMcc, DAG, dl);
   3244   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   3245   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
   3246                            CCR, Cmp);
   3247 
   3248   SDValue Ops[2] = { Lo, Hi };
   3249   return DAG.getMergeValues(Ops, 2, dl);
   3250 }
   3251 
   3252 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
   3253 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3254 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   3255                                                SelectionDAG &DAG) const {
   3256   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3257   EVT VT = Op.getValueType();
   3258   unsigned VTBits = VT.getSizeInBits();
   3259   DebugLoc dl = Op.getDebugLoc();
   3260   SDValue ShOpLo = Op.getOperand(0);
   3261   SDValue ShOpHi = Op.getOperand(1);
   3262   SDValue ShAmt  = Op.getOperand(2);
   3263   SDValue ARMcc;
   3264 
   3265   assert(Op.getOpcode() == ISD::SHL_PARTS);
   3266   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3267                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3268   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   3269   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3270                                    DAG.getConstant(VTBits, MVT::i32));
   3271   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   3272   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   3273 
   3274   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3275   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3276   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3277                           ARMcc, DAG, dl);
   3278   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   3279   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
   3280                            CCR, Cmp);
   3281 
   3282   SDValue Ops[2] = { Lo, Hi };
   3283   return DAG.getMergeValues(Ops, 2, dl);
   3284 }
   3285 
   3286 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   3287                                             SelectionDAG &DAG) const {
   3288   // The rounding mode is in bits 23:22 of the FPSCR.
   3289   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
   3290   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
   3291   // so that the shift + and get folded into a bitfield extract.
   3292   DebugLoc dl = Op.getDebugLoc();
   3293   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
   3294                               DAG.getConstant(Intrinsic::arm_get_fpscr,
   3295                                               MVT::i32));
   3296   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
   3297                                   DAG.getConstant(1U << 22, MVT::i32));
   3298   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
   3299                               DAG.getConstant(22, MVT::i32));
   3300   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
   3301                      DAG.getConstant(3, MVT::i32));
   3302 }
   3303 
   3304 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   3305                          const ARMSubtarget *ST) {
   3306   EVT VT = N->getValueType(0);
   3307   DebugLoc dl = N->getDebugLoc();
   3308 
   3309   if (!ST->hasV6T2Ops())
   3310     return SDValue();
   3311 
   3312   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
   3313   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
   3314 }
   3315 
   3316 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   3317                           const ARMSubtarget *ST) {
   3318   EVT VT = N->getValueType(0);
   3319   DebugLoc dl = N->getDebugLoc();
   3320 
   3321   if (!VT.isVector())
   3322     return SDValue();
   3323 
   3324   // Lower vector shifts on NEON to use VSHL.
   3325   assert(ST->hasNEON() && "unexpected vector shift");
   3326 
   3327   // Left shifts translate directly to the vshiftu intrinsic.
   3328   if (N->getOpcode() == ISD::SHL)
   3329     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3330                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
   3331                        N->getOperand(0), N->getOperand(1));
   3332 
   3333   assert((N->getOpcode() == ISD::SRA ||
   3334           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
   3335 
   3336   // NEON uses the same intrinsics for both left and right shifts.  For
   3337   // right shifts, the shift amounts are negative, so negate the vector of
   3338   // shift amounts.
   3339   EVT ShiftVT = N->getOperand(1).getValueType();
   3340   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
   3341                                      getZeroVector(ShiftVT, DAG, dl),
   3342                                      N->getOperand(1));
   3343   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
   3344                              Intrinsic::arm_neon_vshifts :
   3345                              Intrinsic::arm_neon_vshiftu);
   3346   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3347                      DAG.getConstant(vshiftInt, MVT::i32),
   3348                      N->getOperand(0), NegatedCount);
   3349 }
   3350 
   3351 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
   3352                                 const ARMSubtarget *ST) {
   3353   EVT VT = N->getValueType(0);
   3354   DebugLoc dl = N->getDebugLoc();
   3355 
   3356   // We can get here for a node like i32 = ISD::SHL i32, i64
   3357   if (VT != MVT::i64)
   3358     return SDValue();
   3359 
   3360   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
   3361          "Unknown shift to lower!");
   3362 
   3363   // We only lower SRA, SRL of 1 here, all others use generic lowering.
   3364   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
   3365       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
   3366     return SDValue();
   3367 
   3368   // If we are in thumb mode, we don't have RRX.
   3369   if (ST->isThumb1Only()) return SDValue();
   3370 
   3371   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   3372   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3373                            DAG.getConstant(0, MVT::i32));
   3374   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3375                            DAG.getConstant(1, MVT::i32));
   3376 
   3377   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   3378   // captures the result into a carry flag.
   3379   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
   3380   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
   3381 
   3382   // The low part is an ARMISD::RRX operand, which shifts the carry in.
   3383   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
   3384 
   3385   // Merge the pieces into a single i64 value.
   3386  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
   3387 }
   3388 
   3389 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   3390   SDValue TmpOp0, TmpOp1;
   3391   bool Invert = false;
   3392   bool Swap = false;
   3393   unsigned Opc = 0;
   3394 
   3395   SDValue Op0 = Op.getOperand(0);
   3396   SDValue Op1 = Op.getOperand(1);
   3397   SDValue CC = Op.getOperand(2);
   3398   EVT VT = Op.getValueType();
   3399   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   3400   DebugLoc dl = Op.getDebugLoc();
   3401 
   3402   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
   3403     switch (SetCCOpcode) {
   3404     default: llvm_unreachable("Illegal FP comparison"); break;
   3405     case ISD::SETUNE:
   3406     case ISD::SETNE:  Invert = true; // Fallthrough
   3407     case ISD::SETOEQ:
   3408     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3409     case ISD::SETOLT:
   3410     case ISD::SETLT: Swap = true; // Fallthrough
   3411     case ISD::SETOGT:
   3412     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3413     case ISD::SETOLE:
   3414     case ISD::SETLE:  Swap = true; // Fallthrough
   3415     case ISD::SETOGE:
   3416     case ISD::SETGE: Opc = ARMISD::VCGE; break;
   3417     case ISD::SETUGE: Swap = true; // Fallthrough
   3418     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
   3419     case ISD::SETUGT: Swap = true; // Fallthrough
   3420     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
   3421     case ISD::SETUEQ: Invert = true; // Fallthrough
   3422     case ISD::SETONE:
   3423       // Expand this to (OLT | OGT).
   3424       TmpOp0 = Op0;
   3425       TmpOp1 = Op1;
   3426       Opc = ISD::OR;
   3427       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3428       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
   3429       break;
   3430     case ISD::SETUO: Invert = true; // Fallthrough
   3431     case ISD::SETO:
   3432       // Expand this to (OLT | OGE).
   3433       TmpOp0 = Op0;
   3434       TmpOp1 = Op1;
   3435       Opc = ISD::OR;
   3436       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3437       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
   3438       break;
   3439     }
   3440   } else {
   3441     // Integer comparisons.
   3442     switch (SetCCOpcode) {
   3443     default: llvm_unreachable("Illegal integer comparison"); break;
   3444     case ISD::SETNE:  Invert = true;
   3445     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3446     case ISD::SETLT:  Swap = true;
   3447     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3448     case ISD::SETLE:  Swap = true;
   3449     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
   3450     case ISD::SETULT: Swap = true;
   3451     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
   3452     case ISD::SETULE: Swap = true;
   3453     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
   3454     }
   3455 
   3456     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
   3457     if (Opc == ARMISD::VCEQ) {
   3458 
   3459       SDValue AndOp;
   3460       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3461         AndOp = Op0;
   3462       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
   3463         AndOp = Op1;
   3464 
   3465       // Ignore bitconvert.
   3466       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
   3467         AndOp = AndOp.getOperand(0);
   3468 
   3469       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
   3470         Opc = ARMISD::VTST;
   3471         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
   3472         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
   3473         Invert = !Invert;
   3474       }
   3475     }
   3476   }
   3477 
   3478   if (Swap)
   3479     std::swap(Op0, Op1);
   3480 
   3481   // If one of the operands is a constant vector zero, attempt to fold the
   3482   // comparison to a specialized compare-against-zero form.
   3483   SDValue SingleOp;
   3484   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3485     SingleOp = Op0;
   3486   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
   3487     if (Opc == ARMISD::VCGE)
   3488       Opc = ARMISD::VCLEZ;
   3489     else if (Opc == ARMISD::VCGT)
   3490       Opc = ARMISD::VCLTZ;
   3491     SingleOp = Op1;
   3492   }
   3493 
   3494   SDValue Result;
   3495   if (SingleOp.getNode()) {
   3496     switch (Opc) {
   3497     case ARMISD::VCEQ:
   3498       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
   3499     case ARMISD::VCGE:
   3500       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
   3501     case ARMISD::VCLEZ:
   3502       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
   3503     case ARMISD::VCGT:
   3504       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
   3505     case ARMISD::VCLTZ:
   3506       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
   3507     default:
   3508       Result =