Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that ARM uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-isel"
     16 #include "ARMISelLowering.h"
     17 #include "ARM.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMMachineFunctionInfo.h"
     21 #include "ARMPerfectShuffle.h"
     22 #include "ARMSubtarget.h"
     23 #include "ARMTargetMachine.h"
     24 #include "ARMTargetObjectFile.h"
     25 #include "MCTargetDesc/ARMAddressingModes.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/ADT/StringExtras.h"
     28 #include "llvm/CodeGen/CallingConvLower.h"
     29 #include "llvm/CodeGen/IntrinsicLowering.h"
     30 #include "llvm/CodeGen/MachineBasicBlock.h"
     31 #include "llvm/CodeGen/MachineFrameInfo.h"
     32 #include "llvm/CodeGen/MachineFunction.h"
     33 #include "llvm/CodeGen/MachineInstrBuilder.h"
     34 #include "llvm/CodeGen/MachineModuleInfo.h"
     35 #include "llvm/CodeGen/MachineRegisterInfo.h"
     36 #include "llvm/CodeGen/SelectionDAG.h"
     37 #include "llvm/IR/CallingConv.h"
     38 #include "llvm/IR/Constants.h"
     39 #include "llvm/IR/Function.h"
     40 #include "llvm/IR/GlobalValue.h"
     41 #include "llvm/IR/Instruction.h"
     42 #include "llvm/IR/Instructions.h"
     43 #include "llvm/IR/Intrinsics.h"
     44 #include "llvm/IR/Type.h"
     45 #include "llvm/MC/MCSectionMachO.h"
     46 #include "llvm/Support/CommandLine.h"
     47 #include "llvm/Support/ErrorHandling.h"
     48 #include "llvm/Support/MathExtras.h"
     49 #include "llvm/Support/raw_ostream.h"
     50 #include "llvm/Target/TargetOptions.h"
     51 using namespace llvm;
     52 
     53 STATISTIC(NumTailCalls, "Number of tail calls");
     54 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
     55 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
     56 
     57 // This option should go away when tail calls fully work.
     58 static cl::opt<bool>
     59 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
     60   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
     61   cl::init(false));
     62 
     63 cl::opt<bool>
     64 EnableARMLongCalls("arm-long-calls", cl::Hidden,
     65   cl::desc("Generate calls via indirect call instructions"),
     66   cl::init(false));
     67 
     68 static cl::opt<bool>
     69 ARMInterworking("arm-interworking", cl::Hidden,
     70   cl::desc("Enable / disable ARM interworking (for debugging only)"),
     71   cl::init(true));
     72 
     73 namespace {
     74   class ARMCCState : public CCState {
     75   public:
     76     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
     77                const TargetMachine &TM, SmallVectorImpl<CCValAssign> &locs,
     78                LLVMContext &C, ParmContext PC)
     79         : CCState(CC, isVarArg, MF, TM, locs, C) {
     80       assert(((PC == Call) || (PC == Prologue)) &&
     81              "ARMCCState users must specify whether their context is call"
     82              "or prologue generation.");
     83       CallOrPrologue = PC;
     84     }
     85   };
     86 }
     87 
     88 // The APCS parameter registers.
     89 static const uint16_t GPRArgRegs[] = {
     90   ARM::R0, ARM::R1, ARM::R2, ARM::R3
     91 };
     92 
     93 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
     94                                        MVT PromotedBitwiseVT) {
     95   if (VT != PromotedLdStVT) {
     96     setOperationAction(ISD::LOAD, VT, Promote);
     97     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
     98 
     99     setOperationAction(ISD::STORE, VT, Promote);
    100     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
    101   }
    102 
    103   MVT ElemTy = VT.getVectorElementType();
    104   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
    105     setOperationAction(ISD::SETCC, VT, Custom);
    106   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    107   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    108   if (ElemTy == MVT::i32) {
    109     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
    110     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
    111     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
    112     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
    113   } else {
    114     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
    115     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
    116     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
    117     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
    118   }
    119   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
    120   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
    121   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
    122   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
    123   setOperationAction(ISD::SELECT,            VT, Expand);
    124   setOperationAction(ISD::SELECT_CC,         VT, Expand);
    125   setOperationAction(ISD::VSELECT,           VT, Expand);
    126   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
    127   if (VT.isInteger()) {
    128     setOperationAction(ISD::SHL, VT, Custom);
    129     setOperationAction(ISD::SRA, VT, Custom);
    130     setOperationAction(ISD::SRL, VT, Custom);
    131   }
    132 
    133   // Promote all bit-wise operations.
    134   if (VT.isInteger() && VT != PromotedBitwiseVT) {
    135     setOperationAction(ISD::AND, VT, Promote);
    136     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
    137     setOperationAction(ISD::OR,  VT, Promote);
    138     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
    139     setOperationAction(ISD::XOR, VT, Promote);
    140     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
    141   }
    142 
    143   // Neon does not support vector divide/remainder operations.
    144   setOperationAction(ISD::SDIV, VT, Expand);
    145   setOperationAction(ISD::UDIV, VT, Expand);
    146   setOperationAction(ISD::FDIV, VT, Expand);
    147   setOperationAction(ISD::SREM, VT, Expand);
    148   setOperationAction(ISD::UREM, VT, Expand);
    149   setOperationAction(ISD::FREM, VT, Expand);
    150 }
    151 
    152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
    153   addRegisterClass(VT, &ARM::DPRRegClass);
    154   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
    155 }
    156 
    157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
    158   addRegisterClass(VT, &ARM::QPRRegClass);
    159   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
    160 }
    161 
    162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
    163   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
    164     return new TargetLoweringObjectFileMachO();
    165 
    166   return new ARMElfTargetObjectFile();
    167 }
    168 
    169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    170     : TargetLowering(TM, createTLOF(TM)) {
    171   Subtarget = &TM.getSubtarget<ARMSubtarget>();
    172   RegInfo = TM.getRegisterInfo();
    173   Itins = TM.getInstrItineraryData();
    174 
    175   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
    176 
    177   if (Subtarget->isTargetDarwin()) {
    178     // Uses VFP for Thumb libfuncs if available.
    179     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
    180       // Single-precision floating-point arithmetic.
    181       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
    182       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
    183       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
    184       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
    185 
    186       // Double-precision floating-point arithmetic.
    187       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
    188       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
    189       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
    190       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
    191 
    192       // Single-precision comparisons.
    193       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
    194       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
    195       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
    196       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
    197       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
    198       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
    199       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
    200       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
    201 
    202       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    203       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
    204       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    205       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    206       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    207       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    208       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    209       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    210 
    211       // Double-precision comparisons.
    212       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
    213       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
    214       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
    215       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
    216       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
    217       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
    218       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
    219       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
    220 
    221       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    222       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
    223       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    224       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    225       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    226       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    227       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    228       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    229 
    230       // Floating-point to integer conversions.
    231       // i64 conversions are done via library routines even when generating VFP
    232       // instructions, so use the same ones.
    233       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
    234       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
    235       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
    236       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
    237 
    238       // Conversions between floating types.
    239       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
    240       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
    241 
    242       // Integer to floating-point conversions.
    243       // i64 conversions are done via library routines even when generating VFP
    244       // instructions, so use the same ones.
    245       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
    246       // e.g., __floatunsidf vs. __floatunssidfvfp.
    247       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
    248       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
    249       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
    250       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
    251     }
    252   }
    253 
    254   // These libcalls are not available in 32-bit.
    255   setLibcallName(RTLIB::SHL_I128, 0);
    256   setLibcallName(RTLIB::SRL_I128, 0);
    257   setLibcallName(RTLIB::SRA_I128, 0);
    258 
    259   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
    260     // Double-precision floating-point arithmetic helper functions
    261     // RTABI chapter 4.1.2, Table 2
    262     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
    263     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
    264     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
    265     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
    266     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
    267     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
    268     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
    269     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
    270 
    271     // Double-precision floating-point comparison helper functions
    272     // RTABI chapter 4.1.2, Table 3
    273     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
    274     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    275     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
    276     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
    277     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
    278     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    279     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
    280     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    281     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
    282     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    283     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
    284     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    285     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
    286     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    287     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
    288     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    289     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
    290     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
    291     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
    292     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
    293     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
    294     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
    295     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
    296     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
    297 
    298     // Single-precision floating-point arithmetic helper functions
    299     // RTABI chapter 4.1.2, Table 4
    300     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
    301     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
    302     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
    303     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
    304     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
    305     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
    306     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
    307     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
    308 
    309     // Single-precision floating-point comparison helper functions
    310     // RTABI chapter 4.1.2, Table 5
    311     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
    312     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    313     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
    314     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
    315     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
    316     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    317     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
    318     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    319     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
    320     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    321     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
    322     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    323     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
    324     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    325     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
    326     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    327     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
    328     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
    329     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
    330     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
    331     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
    332     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
    333     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
    334     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
    335 
    336     // Floating-point to integer conversions.
    337     // RTABI chapter 4.1.2, Table 6
    338     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
    339     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
    340     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
    341     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
    342     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
    343     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
    344     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
    345     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
    346     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
    347     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
    348     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
    349     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
    350     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
    351     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
    352     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
    353     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
    354 
    355     // Conversions between floating types.
    356     // RTABI chapter 4.1.2, Table 7
    357     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
    358     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
    359     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
    360     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
    361 
    362     // Integer to floating-point conversions.
    363     // RTABI chapter 4.1.2, Table 8
    364     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
    365     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
    366     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
    367     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
    368     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
    369     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
    370     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
    371     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
    372     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    373     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    374     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    375     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    376     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    377     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    378     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    379     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    380 
    381     // Long long helper functions
    382     // RTABI chapter 4.2, Table 9
    383     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
    384     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
    385     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
    386     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
    387     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
    388     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    389     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    390     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
    391     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
    392     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
    393 
    394     // Integer division functions
    395     // RTABI chapter 4.3.1
    396     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
    397     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
    398     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
    399     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
    400     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
    401     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
    402     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
    403     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
    404     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
    405     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
    406     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
    407     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    408     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
    409     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
    410     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
    411     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    412 
    413     // Memory operations
    414     // RTABI chapter 4.3.4
    415     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
    416     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
    417     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
    418     setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
    419     setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
    420     setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
    421   }
    422 
    423   // Use divmod compiler-rt calls for iOS 5.0 and later.
    424   if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
    425       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
    426     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
    427     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
    428   }
    429 
    430   if (Subtarget->isThumb1Only())
    431     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
    432   else
    433     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
    434   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    435       !Subtarget->isThumb1Only()) {
    436     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
    437     if (!Subtarget->isFPOnlySP())
    438       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
    439 
    440     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    441   }
    442 
    443   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    444        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
    445     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    446          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
    447       setTruncStoreAction((MVT::SimpleValueType)VT,
    448                           (MVT::SimpleValueType)InnerVT, Expand);
    449     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    450     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    451     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
    452   }
    453 
    454   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
    455 
    456   if (Subtarget->hasNEON()) {
    457     addDRTypeForNEON(MVT::v2f32);
    458     addDRTypeForNEON(MVT::v8i8);
    459     addDRTypeForNEON(MVT::v4i16);
    460     addDRTypeForNEON(MVT::v2i32);
    461     addDRTypeForNEON(MVT::v1i64);
    462 
    463     addQRTypeForNEON(MVT::v4f32);
    464     addQRTypeForNEON(MVT::v2f64);
    465     addQRTypeForNEON(MVT::v16i8);
    466     addQRTypeForNEON(MVT::v8i16);
    467     addQRTypeForNEON(MVT::v4i32);
    468     addQRTypeForNEON(MVT::v2i64);
    469 
    470     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
    471     // neither Neon nor VFP support any arithmetic operations on it.
    472     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
    473     // supported for v4f32.
    474     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    475     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    476     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    477     // FIXME: Code duplication: FDIV and FREM are expanded always, see
    478     // ARMTargetLowering::addTypeForNEON method for details.
    479     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
    480     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
    481     // FIXME: Create unittest.
    482     // In another words, find a way when "copysign" appears in DAG with vector
    483     // operands.
    484     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
    485     // FIXME: Code duplication: SETCC has custom operation action, see
    486     // ARMTargetLowering::addTypeForNEON method for details.
    487     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
    488     // FIXME: Create unittest for FNEG and for FABS.
    489     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
    490     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    491     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
    492     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
    493     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
    494     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
    495     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
    496     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
    497     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
    498     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
    499     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
    500     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
    501     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
    502     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
    503     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
    504     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
    505     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
    506     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
    507     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
    508 
    509     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
    510     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
    511     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
    512     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
    513     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
    514     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
    515     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
    516     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
    517     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
    518     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
    519     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
    520     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
    521     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
    522     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
    523     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
    524 
    525     // Mark v2f32 intrinsics.
    526     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
    527     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
    528     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
    529     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
    530     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
    531     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
    532     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
    533     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
    534     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
    535     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
    536     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
    537     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
    538     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
    539     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
    540     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
    541 
    542     // Neon does not support some operations on v1i64 and v2i64 types.
    543     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
    544     // Custom handling for some quad-vector types to detect VMULL.
    545     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
    546     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
    547     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
    548     // Custom handling for some vector types to avoid expensive expansions
    549     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
    550     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
    551     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
    552     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
    553     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
    554     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
    555     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
    556     // a destination type that is wider than the source, and nor does
    557     // it have a FP_TO_[SU]INT instruction with a narrower destination than
    558     // source.
    559     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
    560     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
    561     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
    562     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
    563 
    564     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
    565     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
    566 
    567     // Custom expand long extensions to vectors.
    568     setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32,  Custom);
    569     setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32,  Custom);
    570     setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64,  Custom);
    571     setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64,  Custom);
    572     setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
    573     setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
    574     setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64,  Custom);
    575     setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64,  Custom);
    576 
    577     // NEON does not have single instruction CTPOP for vectors with element
    578     // types wider than 8-bits.  However, custom lowering can leverage the
    579     // v8i8/v16i8 vcnt instruction.
    580     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
    581     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
    582     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
    583     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
    584 
    585     // NEON only has FMA instructions as of VFP4.
    586     if (!Subtarget->hasVFP4()) {
    587       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
    588       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
    589     }
    590 
    591     setTargetDAGCombine(ISD::INTRINSIC_VOID);
    592     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
    593     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
    594     setTargetDAGCombine(ISD::SHL);
    595     setTargetDAGCombine(ISD::SRL);
    596     setTargetDAGCombine(ISD::SRA);
    597     setTargetDAGCombine(ISD::SIGN_EXTEND);
    598     setTargetDAGCombine(ISD::ZERO_EXTEND);
    599     setTargetDAGCombine(ISD::ANY_EXTEND);
    600     setTargetDAGCombine(ISD::SELECT_CC);
    601     setTargetDAGCombine(ISD::BUILD_VECTOR);
    602     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
    603     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
    604     setTargetDAGCombine(ISD::STORE);
    605     setTargetDAGCombine(ISD::FP_TO_SINT);
    606     setTargetDAGCombine(ISD::FP_TO_UINT);
    607     setTargetDAGCombine(ISD::FDIV);
    608 
    609     // It is legal to extload from v4i8 to v4i16 or v4i32.
    610     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
    611                   MVT::v4i16, MVT::v2i16,
    612                   MVT::v2i32};
    613     for (unsigned i = 0; i < 6; ++i) {
    614       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
    615       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
    616       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
    617     }
    618   }
    619 
    620   // ARM and Thumb2 support UMLAL/SMLAL.
    621   if (!Subtarget->isThumb1Only())
    622     setTargetDAGCombine(ISD::ADDC);
    623 
    624 
    625   computeRegisterProperties();
    626 
    627   // ARM does not have f32 extending load.
    628   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    629 
    630   // ARM does not have i1 sign extending load.
    631   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
    632 
    633   // ARM supports all 4 flavors of integer indexed load / store.
    634   if (!Subtarget->isThumb1Only()) {
    635     for (unsigned im = (unsigned)ISD::PRE_INC;
    636          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
    637       setIndexedLoadAction(im,  MVT::i1,  Legal);
    638       setIndexedLoadAction(im,  MVT::i8,  Legal);
    639       setIndexedLoadAction(im,  MVT::i16, Legal);
    640       setIndexedLoadAction(im,  MVT::i32, Legal);
    641       setIndexedStoreAction(im, MVT::i1,  Legal);
    642       setIndexedStoreAction(im, MVT::i8,  Legal);
    643       setIndexedStoreAction(im, MVT::i16, Legal);
    644       setIndexedStoreAction(im, MVT::i32, Legal);
    645     }
    646   }
    647 
    648   // i64 operation support.
    649   setOperationAction(ISD::MUL,     MVT::i64, Expand);
    650   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
    651   if (Subtarget->isThumb1Only()) {
    652     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
    653     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
    654   }
    655   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
    656       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
    657     setOperationAction(ISD::MULHS, MVT::i32, Expand);
    658 
    659   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
    660   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
    661   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
    662   setOperationAction(ISD::SRL,       MVT::i64, Custom);
    663   setOperationAction(ISD::SRA,       MVT::i64, Custom);
    664 
    665   if (!Subtarget->isThumb1Only()) {
    666     // FIXME: We should do this for Thumb1 as well.
    667     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
    668     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
    669     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
    670     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
    671   }
    672 
    673   // ARM does not have ROTL.
    674   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
    675   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
    676   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
    677   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
    678     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
    679 
    680   // These just redirect to CTTZ and CTLZ on ARM.
    681   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
    682   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
    683 
    684   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
    685 
    686   // Only ARMv6 has BSWAP.
    687   if (!Subtarget->hasV6Ops())
    688     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
    689 
    690   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
    691       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
    692     // These are expanded into libcalls if the cpu doesn't have HW divider.
    693     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
    694     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
    695   }
    696 
    697   // FIXME: Also set divmod for SREM on EABI
    698   setOperationAction(ISD::SREM,  MVT::i32, Expand);
    699   setOperationAction(ISD::UREM,  MVT::i32, Expand);
    700   // Register based DivRem for AEABI (RTABI 4.2)
    701   if (Subtarget->isTargetAEABI()) {
    702     setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
    703     setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
    704     setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
    705     setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
    706     setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
    707     setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
    708     setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
    709     setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
    710 
    711     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
    712     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
    713     setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS);
    714     setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS);
    715     setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS);
    716     setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS);
    717     setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS);
    718     setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS);
    719 
    720     setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
    721     setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
    722   } else {
    723     setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    724     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    725   }
    726 
    727   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
    728   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
    729   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
    730   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
    731   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
    732 
    733   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    734 
    735   // Use the default implementation.
    736   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
    737   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
    738   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
    739   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
    740   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
    741   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
    742 
    743   if (!Subtarget->isTargetDarwin()) {
    744     // Non-Darwin platforms may return values in these registers via the
    745     // personality function.
    746     setExceptionPointerRegister(ARM::R0);
    747     setExceptionSelectorRegister(ARM::R1);
    748   }
    749 
    750   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
    751   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
    752   // the default expansion.
    753   // FIXME: This should be checking for v6k, not just v6.
    754   if (Subtarget->hasDataBarrier() ||
    755       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
    756     // membarrier needs custom lowering; the rest are legal and handled
    757     // normally.
    758     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
    759     // Custom lowering for 64-bit ops
    760     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
    761     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
    762     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
    763     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
    764     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
    765     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Custom);
    766     setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i64, Custom);
    767     setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i64, Custom);
    768     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
    769     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
    770     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
    771     // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
    772     setInsertFencesForAtomic(true);
    773   } else {
    774     // Set them all for expansion, which will force libcalls.
    775     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
    776     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
    777     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
    778     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
    779     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
    780     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
    781     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
    782     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
    783     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
    784     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
    785     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
    786     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
    787     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
    788     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
    789     // Unordered/Monotonic case.
    790     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
    791     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
    792   }
    793 
    794   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
    795 
    796   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
    797   if (!Subtarget->hasV6Ops()) {
    798     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    799     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
    800   }
    801   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    802 
    803   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    804       !Subtarget->isThumb1Only()) {
    805     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
    806     // iff target supports vfp2.
    807     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
    808     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
    809   }
    810 
    811   // We want to custom lower some of our intrinsics.
    812   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    813   if (Subtarget->isTargetDarwin()) {
    814     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
    815     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
    816     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
    817   }
    818 
    819   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    820   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
    821   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
    822   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
    823   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
    824   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
    825   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
    826   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    827   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    828 
    829   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
    830   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
    831   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
    832   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
    833   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
    834 
    835   // We don't support sin/cos/fmod/copysign/pow
    836   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
    837   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
    838   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
    839   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
    840   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
    841   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
    842   setOperationAction(ISD::FREM,      MVT::f64, Expand);
    843   setOperationAction(ISD::FREM,      MVT::f32, Expand);
    844   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    845       !Subtarget->isThumb1Only()) {
    846     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    847     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
    848   }
    849   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
    850   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
    851 
    852   if (!Subtarget->hasVFP4()) {
    853     setOperationAction(ISD::FMA, MVT::f64, Expand);
    854     setOperationAction(ISD::FMA, MVT::f32, Expand);
    855   }
    856 
    857   // Various VFP goodness
    858   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
    859     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
    860     if (Subtarget->hasVFP2()) {
    861       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    862       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
    863       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    864       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    865     }
    866     // Special handling for half-precision FP.
    867     if (!Subtarget->hasFP16()) {
    868       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
    869       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
    870     }
    871   }
    872 
    873   // We have target-specific dag combine patterns for the following nodes:
    874   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
    875   setTargetDAGCombine(ISD::ADD);
    876   setTargetDAGCombine(ISD::SUB);
    877   setTargetDAGCombine(ISD::MUL);
    878   setTargetDAGCombine(ISD::AND);
    879   setTargetDAGCombine(ISD::OR);
    880   setTargetDAGCombine(ISD::XOR);
    881 
    882   if (Subtarget->hasV6Ops())
    883     setTargetDAGCombine(ISD::SRL);
    884 
    885   setStackPointerRegisterToSaveRestore(ARM::SP);
    886 
    887   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
    888       !Subtarget->hasVFP2())
    889     setSchedulingPreference(Sched::RegPressure);
    890   else
    891     setSchedulingPreference(Sched::Hybrid);
    892 
    893   //// temporary - rewrite interface to use type
    894   MaxStoresPerMemset = 8;
    895   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
    896   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
    897   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    898   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
    899   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    900 
    901   // On ARM arguments smaller than 4 bytes are extended, so all arguments
    902   // are at least 4 bytes aligned.
    903   setMinStackArgumentAlignment(4);
    904 
    905   // Prefer likely predicted branches to selects on out-of-order cores.
    906   PredictableSelectIsExpensive = Subtarget->isLikeA9();
    907 
    908   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
    909 }
    910 
    911 // FIXME: It might make sense to define the representative register class as the
    912 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
    913 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
    914 // SPR's representative would be DPR_VFP2. This should work well if register
    915 // pressure tracking were modified such that a register use would increment the
    916 // pressure of the register class's representative and all of it's super
    917 // classes' representatives transitively. We have not implemented this because
    918 // of the difficulty prior to coalescing of modeling operand register classes
    919 // due to the common occurrence of cross class copies and subregister insertions
    920 // and extractions.
    921 std::pair<const TargetRegisterClass*, uint8_t>
    922 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
    923   const TargetRegisterClass *RRC = 0;
    924   uint8_t Cost = 1;
    925   switch (VT.SimpleTy) {
    926   default:
    927     return TargetLowering::findRepresentativeClass(VT);
    928   // Use DPR as representative register class for all floating point
    929   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
    930   // the cost is 1 for both f32 and f64.
    931   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
    932   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
    933     RRC = &ARM::DPRRegClass;
    934     // When NEON is used for SP, only half of the register file is available
    935     // because operations that define both SP and DP results will be constrained
    936     // to the VFP2 class (D0-D15). We currently model this constraint prior to
    937     // coalescing by double-counting the SP regs. See the FIXME above.
    938     if (Subtarget->useNEONForSinglePrecisionFP())
    939       Cost = 2;
    940     break;
    941   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
    942   case MVT::v4f32: case MVT::v2f64:
    943     RRC = &ARM::DPRRegClass;
    944     Cost = 2;
    945     break;
    946   case MVT::v4i64:
    947     RRC = &ARM::DPRRegClass;
    948     Cost = 4;
    949     break;
    950   case MVT::v8i64:
    951     RRC = &ARM::DPRRegClass;
    952     Cost = 8;
    953     break;
    954   }
    955   return std::make_pair(RRC, Cost);
    956 }
    957 
    958 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    959   switch (Opcode) {
    960   default: return 0;
    961   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
    962   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
    963   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
    964   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
    965   case ARMISD::CALL:          return "ARMISD::CALL";
    966   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
    967   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
    968   case ARMISD::tCALL:         return "ARMISD::tCALL";
    969   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
    970   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
    971   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
    972   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
    973   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
    974   case ARMISD::CMP:           return "ARMISD::CMP";
    975   case ARMISD::CMN:           return "ARMISD::CMN";
    976   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
    977   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
    978   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
    979   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
    980   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
    981 
    982   case ARMISD::CMOV:          return "ARMISD::CMOV";
    983 
    984   case ARMISD::RBIT:          return "ARMISD::RBIT";
    985 
    986   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
    987   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
    988   case ARMISD::SITOF:         return "ARMISD::SITOF";
    989   case ARMISD::UITOF:         return "ARMISD::UITOF";
    990 
    991   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
    992   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
    993   case ARMISD::RRX:           return "ARMISD::RRX";
    994 
    995   case ARMISD::ADDC:          return "ARMISD::ADDC";
    996   case ARMISD::ADDE:          return "ARMISD::ADDE";
    997   case ARMISD::SUBC:          return "ARMISD::SUBC";
    998   case ARMISD::SUBE:          return "ARMISD::SUBE";
    999 
   1000   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
   1001   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
   1002 
   1003   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
   1004   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
   1005 
   1006   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
   1007 
   1008   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
   1009 
   1010   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
   1011 
   1012   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
   1013   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
   1014 
   1015   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
   1016 
   1017   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   1018   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
   1019   case ARMISD::VCGE:          return "ARMISD::VCGE";
   1020   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
   1021   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
   1022   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
   1023   case ARMISD::VCGT:          return "ARMISD::VCGT";
   1024   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
   1025   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
   1026   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
   1027   case ARMISD::VTST:          return "ARMISD::VTST";
   1028 
   1029   case ARMISD::VSHL:          return "ARMISD::VSHL";
   1030   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
   1031   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
   1032   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
   1033   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
   1034   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
   1035   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
   1036   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
   1037   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
   1038   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
   1039   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
   1040   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
   1041   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
   1042   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
   1043   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
   1044   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
   1045   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
   1046   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
   1047   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
   1048   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   1049   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   1050   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
   1051   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
   1052   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
   1053   case ARMISD::VDUP:          return "ARMISD::VDUP";
   1054   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
   1055   case ARMISD::VEXT:          return "ARMISD::VEXT";
   1056   case ARMISD::VREV64:        return "ARMISD::VREV64";
   1057   case ARMISD::VREV32:        return "ARMISD::VREV32";
   1058   case ARMISD::VREV16:        return "ARMISD::VREV16";
   1059   case ARMISD::VZIP:          return "ARMISD::VZIP";
   1060   case ARMISD::VUZP:          return "ARMISD::VUZP";
   1061   case ARMISD::VTRN:          return "ARMISD::VTRN";
   1062   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
   1063   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
   1064   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
   1065   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
   1066   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
   1067   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
   1068   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
   1069   case ARMISD::FMAX:          return "ARMISD::FMAX";
   1070   case ARMISD::FMIN:          return "ARMISD::FMIN";
   1071   case ARMISD::BFI:           return "ARMISD::BFI";
   1072   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   1073   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
   1074   case ARMISD::VBSL:          return "ARMISD::VBSL";
   1075   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   1076   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   1077   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
   1078   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
   1079   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
   1080   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
   1081   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
   1082   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
   1083   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
   1084   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
   1085   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
   1086   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
   1087   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
   1088   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
   1089   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
   1090   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
   1091   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
   1092   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   1093   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   1094   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
   1095 
   1096   case ARMISD::ATOMADD64_DAG:     return "ATOMADD64_DAG";
   1097   case ARMISD::ATOMSUB64_DAG:     return "ATOMSUB64_DAG";
   1098   case ARMISD::ATOMOR64_DAG:      return "ATOMOR64_DAG";
   1099   case ARMISD::ATOMXOR64_DAG:     return "ATOMXOR64_DAG";
   1100   case ARMISD::ATOMAND64_DAG:     return "ATOMAND64_DAG";
   1101   case ARMISD::ATOMNAND64_DAG:    return "ATOMNAND64_DAG";
   1102   case ARMISD::ATOMSWAP64_DAG:    return "ATOMSWAP64_DAG";
   1103   case ARMISD::ATOMCMPXCHG64_DAG: return "ATOMCMPXCHG64_DAG";
   1104   case ARMISD::ATOMMIN64_DAG:     return "ATOMMIN64_DAG";
   1105   case ARMISD::ATOMUMIN64_DAG:    return "ATOMUMIN64_DAG";
   1106   case ARMISD::ATOMMAX64_DAG:     return "ATOMMAX64_DAG";
   1107   case ARMISD::ATOMUMAX64_DAG:    return "ATOMUMAX64_DAG";
   1108   }
   1109 }
   1110 
   1111 EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
   1112   if (!VT.isVector()) return getPointerTy();
   1113   return VT.changeVectorElementTypeToInteger();
   1114 }
   1115 
   1116 /// getRegClassFor - Return the register class that should be used for the
   1117 /// specified value type.
   1118 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   1119   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   1120   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   1121   // load / store 4 to 8 consecutive D registers.
   1122   if (Subtarget->hasNEON()) {
   1123     if (VT == MVT::v4i64)
   1124       return &ARM::QQPRRegClass;
   1125     if (VT == MVT::v8i64)
   1126       return &ARM::QQQQPRRegClass;
   1127   }
   1128   return TargetLowering::getRegClassFor(VT);
   1129 }
   1130 
   1131 // Create a fast isel object.
   1132 FastISel *
   1133 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   1134                                   const TargetLibraryInfo *libInfo) const {
   1135   return ARM::createFastISel(funcInfo, libInfo);
   1136 }
   1137 
   1138 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
   1139 /// be used for loads / stores from the global.
   1140 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
   1141   return (Subtarget->isThumb1Only() ? 127 : 4095);
   1142 }
   1143 
   1144 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   1145   unsigned NumVals = N->getNumValues();
   1146   if (!NumVals)
   1147     return Sched::RegPressure;
   1148 
   1149   for (unsigned i = 0; i != NumVals; ++i) {
   1150     EVT VT = N->getValueType(i);
   1151     if (VT == MVT::Glue || VT == MVT::Other)
   1152       continue;
   1153     if (VT.isFloatingPoint() || VT.isVector())
   1154       return Sched::ILP;
   1155   }
   1156 
   1157   if (!N->isMachineOpcode())
   1158     return Sched::RegPressure;
   1159 
   1160   // Load are scheduled for latency even if there instruction itinerary
   1161   // is not available.
   1162   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   1163   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   1164 
   1165   if (MCID.getNumDefs() == 0)
   1166     return Sched::RegPressure;
   1167   if (!Itins->isEmpty() &&
   1168       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
   1169     return Sched::ILP;
   1170 
   1171   return Sched::RegPressure;
   1172 }
   1173 
   1174 //===----------------------------------------------------------------------===//
   1175 // Lowering Code
   1176 //===----------------------------------------------------------------------===//
   1177 
   1178 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
   1179 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   1180   switch (CC) {
   1181   default: llvm_unreachable("Unknown condition code!");
   1182   case ISD::SETNE:  return ARMCC::NE;
   1183   case ISD::SETEQ:  return ARMCC::EQ;
   1184   case ISD::SETGT:  return ARMCC::GT;
   1185   case ISD::SETGE:  return ARMCC::GE;
   1186   case ISD::SETLT:  return ARMCC::LT;
   1187   case ISD::SETLE:  return ARMCC::LE;
   1188   case ISD::SETUGT: return ARMCC::HI;
   1189   case ISD::SETUGE: return ARMCC::HS;
   1190   case ISD::SETULT: return ARMCC::LO;
   1191   case ISD::SETULE: return ARMCC::LS;
   1192   }
   1193 }
   1194 
   1195 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
   1196 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   1197                         ARMCC::CondCodes &CondCode2) {
   1198   CondCode2 = ARMCC::AL;
   1199   switch (CC) {
   1200   default: llvm_unreachable("Unknown FP condition!");
   1201   case ISD::SETEQ:
   1202   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
   1203   case ISD::SETGT:
   1204   case ISD::SETOGT: CondCode = ARMCC::GT; break;
   1205   case ISD::SETGE:
   1206   case ISD::SETOGE: CondCode = ARMCC::GE; break;
   1207   case ISD::SETOLT: CondCode = ARMCC::MI; break;
   1208   case ISD::SETOLE: CondCode = ARMCC::LS; break;
   1209   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
   1210   case ISD::SETO:   CondCode = ARMCC::VC; break;
   1211   case ISD::SETUO:  CondCode = ARMCC::VS; break;
   1212   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
   1213   case ISD::SETUGT: CondCode = ARMCC::HI; break;
   1214   case ISD::SETUGE: CondCode = ARMCC::PL; break;
   1215   case ISD::SETLT:
   1216   case ISD::SETULT: CondCode = ARMCC::LT; break;
   1217   case ISD::SETLE:
   1218   case ISD::SETULE: CondCode = ARMCC::LE; break;
   1219   case ISD::SETNE:
   1220   case ISD::SETUNE: CondCode = ARMCC::NE; break;
   1221   }
   1222 }
   1223 
   1224 //===----------------------------------------------------------------------===//
   1225 //                      Calling Convention Implementation
   1226 //===----------------------------------------------------------------------===//
   1227 
   1228 #include "ARMGenCallingConv.inc"
   1229 
   1230 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
   1231 /// given CallingConvention value.
   1232 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
   1233                                                  bool Return,
   1234                                                  bool isVarArg) const {
   1235   switch (CC) {
   1236   default:
   1237     llvm_unreachable("Unsupported calling convention");
   1238   case CallingConv::Fast:
   1239     if (Subtarget->hasVFP2() && !isVarArg) {
   1240       if (!Subtarget->isAAPCS_ABI())
   1241         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1242       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1243       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1244     }
   1245     // Fallthrough
   1246   case CallingConv::C: {
   1247     // Use target triple & subtarget features to do actual dispatch.
   1248     if (!Subtarget->isAAPCS_ABI())
   1249       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1250     else if (Subtarget->hasVFP2() &&
   1251              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
   1252              !isVarArg)
   1253       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1254     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1255   }
   1256   case CallingConv::ARM_AAPCS_VFP:
   1257     if (!isVarArg)
   1258       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1259     // Fallthrough
   1260   case CallingConv::ARM_AAPCS:
   1261     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1262   case CallingConv::ARM_APCS:
   1263     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1264   case CallingConv::GHC:
   1265     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
   1266   }
   1267 }
   1268 
   1269 /// LowerCallResult - Lower the result values of a call into the
   1270 /// appropriate copies out of appropriate physical registers.
   1271 SDValue
   1272 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   1273                                    CallingConv::ID CallConv, bool isVarArg,
   1274                                    const SmallVectorImpl<ISD::InputArg> &Ins,
   1275                                    SDLoc dl, SelectionDAG &DAG,
   1276                                    SmallVectorImpl<SDValue> &InVals,
   1277                                    bool isThisReturn, SDValue ThisVal) const {
   1278 
   1279   // Assign locations to each value returned by this call.
   1280   SmallVector<CCValAssign, 16> RVLocs;
   1281   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1282                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1283   CCInfo.AnalyzeCallResult(Ins,
   1284                            CCAssignFnForNode(CallConv, /* Return*/ true,
   1285                                              isVarArg));
   1286 
   1287   // Copy all of the result registers out of their specified physreg.
   1288   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   1289     CCValAssign VA = RVLocs[i];
   1290 
   1291     // Pass 'this' value directly from the argument to return value, to avoid
   1292     // reg unit interference
   1293     if (i == 0 && isThisReturn) {
   1294       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
   1295              "unexpected return calling convention register assignment");
   1296       InVals.push_back(ThisVal);
   1297       continue;
   1298     }
   1299 
   1300     SDValue Val;
   1301     if (VA.needsCustom()) {
   1302       // Handle f64 or half of a v2f64.
   1303       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1304                                       InFlag);
   1305       Chain = Lo.getValue(1);
   1306       InFlag = Lo.getValue(2);
   1307       VA = RVLocs[++i]; // skip ahead to next loc
   1308       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1309                                       InFlag);
   1310       Chain = Hi.getValue(1);
   1311       InFlag = Hi.getValue(2);
   1312       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1313 
   1314       if (VA.getLocVT() == MVT::v2f64) {
   1315         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   1316         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1317                           DAG.getConstant(0, MVT::i32));
   1318 
   1319         VA = RVLocs[++i]; // skip ahead to next loc
   1320         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1321         Chain = Lo.getValue(1);
   1322         InFlag = Lo.getValue(2);
   1323         VA = RVLocs[++i]; // skip ahead to next loc
   1324         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1325         Chain = Hi.getValue(1);
   1326         InFlag = Hi.getValue(2);
   1327         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1328         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1329                           DAG.getConstant(1, MVT::i32));
   1330       }
   1331     } else {
   1332       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
   1333                                InFlag);
   1334       Chain = Val.getValue(1);
   1335       InFlag = Val.getValue(2);
   1336     }
   1337 
   1338     switch (VA.getLocInfo()) {
   1339     default: llvm_unreachable("Unknown loc info!");
   1340     case CCValAssign::Full: break;
   1341     case CCValAssign::BCvt:
   1342       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
   1343       break;
   1344     }
   1345 
   1346     InVals.push_back(Val);
   1347   }
   1348 
   1349   return Chain;
   1350 }
   1351 
   1352 /// LowerMemOpCallTo - Store the argument to the stack.
   1353 SDValue
   1354 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   1355                                     SDValue StackPtr, SDValue Arg,
   1356                                     SDLoc dl, SelectionDAG &DAG,
   1357                                     const CCValAssign &VA,
   1358                                     ISD::ArgFlagsTy Flags) const {
   1359   unsigned LocMemOffset = VA.getLocMemOffset();
   1360   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1361   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   1362   return DAG.getStore(Chain, dl, Arg, PtrOff,
   1363                       MachinePointerInfo::getStack(LocMemOffset),
   1364                       false, false, 0);
   1365 }
   1366 
   1367 void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG,
   1368                                          SDValue Chain, SDValue &Arg,
   1369                                          RegsToPassVector &RegsToPass,
   1370                                          CCValAssign &VA, CCValAssign &NextVA,
   1371                                          SDValue &StackPtr,
   1372                                          SmallVectorImpl<SDValue> &MemOpChains,
   1373                                          ISD::ArgFlagsTy Flags) const {
   1374 
   1375   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   1376                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
   1377   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
   1378 
   1379   if (NextVA.isRegLoc())
   1380     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
   1381   else {
   1382     assert(NextVA.isMemLoc());
   1383     if (StackPtr.getNode() == 0)
   1384       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1385 
   1386     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
   1387                                            dl, DAG, NextVA,
   1388                                            Flags));
   1389   }
   1390 }
   1391 
   1392 /// LowerCall - Lowering a call into a callseq_start <-
   1393 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
   1394 /// nodes.
   1395 SDValue
   1396 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   1397                              SmallVectorImpl<SDValue> &InVals) const {
   1398   SelectionDAG &DAG                     = CLI.DAG;
   1399   SDLoc &dl                          = CLI.DL;
   1400   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   1401   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
   1402   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   1403   SDValue Chain                         = CLI.Chain;
   1404   SDValue Callee                        = CLI.Callee;
   1405   bool &isTailCall                      = CLI.IsTailCall;
   1406   CallingConv::ID CallConv              = CLI.CallConv;
   1407   bool doesNotRet                       = CLI.DoesNotReturn;
   1408   bool isVarArg                         = CLI.IsVarArg;
   1409 
   1410   MachineFunction &MF = DAG.getMachineFunction();
   1411   bool isStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   1412   bool isThisReturn   = false;
   1413   bool isSibCall      = false;
   1414   // Disable tail calls if they're not supported.
   1415   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   1416     isTailCall = false;
   1417   if (isTailCall) {
   1418     // Check if it's really possible to do a tail call.
   1419     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   1420                     isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
   1421                                                    Outs, OutVals, Ins, DAG);
   1422     // We don't support GuaranteedTailCallOpt for ARM, only automatically
   1423     // detected sibcalls.
   1424     if (isTailCall) {
   1425       ++NumTailCalls;
   1426       isSibCall = true;
   1427     }
   1428   }
   1429 
   1430   // Analyze operands of the call, assigning locations to each operand.
   1431   SmallVector<CCValAssign, 16> ArgLocs;
   1432   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1433                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1434   CCInfo.AnalyzeCallOperands(Outs,
   1435                              CCAssignFnForNode(CallConv, /* Return*/ false,
   1436                                                isVarArg));
   1437 
   1438   // Get a count of how many bytes are to be pushed on the stack.
   1439   unsigned NumBytes = CCInfo.getNextStackOffset();
   1440 
   1441   // For tail calls, memory operands are available in our caller's stack.
   1442   if (isSibCall)
   1443     NumBytes = 0;
   1444 
   1445   // Adjust the stack pointer for the new arguments...
   1446   // These operations are automatically eliminated by the prolog/epilog pass
   1447   if (!isSibCall)
   1448     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1449                                  dl);
   1450 
   1451   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1452 
   1453   RegsToPassVector RegsToPass;
   1454   SmallVector<SDValue, 8> MemOpChains;
   1455 
   1456   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   1457   // of tail call optimization, arguments are handled later.
   1458   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1459        i != e;
   1460        ++i, ++realArgIdx) {
   1461     CCValAssign &VA = ArgLocs[i];
   1462     SDValue Arg = OutVals[realArgIdx];
   1463     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1464     bool isByVal = Flags.isByVal();
   1465 
   1466     // Promote the value if needed.
   1467     switch (VA.getLocInfo()) {
   1468     default: llvm_unreachable("Unknown loc info!");
   1469     case CCValAssign::Full: break;
   1470     case CCValAssign::SExt:
   1471       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
   1472       break;
   1473     case CCValAssign::ZExt:
   1474       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
   1475       break;
   1476     case CCValAssign::AExt:
   1477       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
   1478       break;
   1479     case CCValAssign::BCvt:
   1480       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1481       break;
   1482     }
   1483 
   1484     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
   1485     if (VA.needsCustom()) {
   1486       if (VA.getLocVT() == MVT::v2f64) {
   1487         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1488                                   DAG.getConstant(0, MVT::i32));
   1489         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1490                                   DAG.getConstant(1, MVT::i32));
   1491 
   1492         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
   1493                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1494 
   1495         VA = ArgLocs[++i]; // skip ahead to next loc
   1496         if (VA.isRegLoc()) {
   1497           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
   1498                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1499         } else {
   1500           assert(VA.isMemLoc());
   1501 
   1502           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
   1503                                                  dl, DAG, VA, Flags));
   1504         }
   1505       } else {
   1506         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
   1507                          StackPtr, MemOpChains, Flags);
   1508       }
   1509     } else if (VA.isRegLoc()) {
   1510       if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) {
   1511         assert(VA.getLocVT() == MVT::i32 &&
   1512                "unexpected calling convention register assignment");
   1513         assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&
   1514                "unexpected use of 'returned'");
   1515         isThisReturn = true;
   1516       }
   1517       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   1518     } else if (isByVal) {
   1519       assert(VA.isMemLoc());
   1520       unsigned offset = 0;
   1521 
   1522       // True if this byval aggregate will be split between registers
   1523       // and memory.
   1524       unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
   1525       unsigned CurByValIdx = CCInfo.getInRegsParamsProceed();
   1526 
   1527       if (CurByValIdx < ByValArgsCount) {
   1528 
   1529         unsigned RegBegin, RegEnd;
   1530         CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
   1531 
   1532         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1533         unsigned int i, j;
   1534         for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
   1535           SDValue Const = DAG.getConstant(4*i, MVT::i32);
   1536           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
   1537           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
   1538                                      MachinePointerInfo(),
   1539                                      false, false, false, 0);
   1540           MemOpChains.push_back(Load.getValue(1));
   1541           RegsToPass.push_back(std::make_pair(j, Load));
   1542         }
   1543 
   1544         // If parameter size outsides register area, "offset" value
   1545         // helps us to calculate stack slot for remained part properly.
   1546         offset = RegEnd - RegBegin;
   1547 
   1548         CCInfo.nextInRegsParam();
   1549       }
   1550 
   1551       if (Flags.getByValSize() > 4*offset) {
   1552         unsigned LocMemOffset = VA.getLocMemOffset();
   1553         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1554         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
   1555                                   StkPtrOff);
   1556         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
   1557         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
   1558         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
   1559                                            MVT::i32);
   1560         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
   1561 
   1562         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1563         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
   1564         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
   1565                                           Ops, array_lengthof(Ops)));
   1566       }
   1567     } else if (!isSibCall) {
   1568       assert(VA.isMemLoc());
   1569 
   1570       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
   1571                                              dl, DAG, VA, Flags));
   1572     }
   1573   }
   1574 
   1575   if (!MemOpChains.empty())
   1576     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1577                         &MemOpChains[0], MemOpChains.size());
   1578 
   1579   // Build a sequence of copy-to-reg nodes chained together with token chain
   1580   // and flag operands which copy the outgoing args into the appropriate regs.
   1581   SDValue InFlag;
   1582   // Tail call byval lowering might overwrite argument registers so in case of
   1583   // tail call optimization the copies to registers are lowered later.
   1584   if (!isTailCall)
   1585     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1586       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1587                                RegsToPass[i].second, InFlag);
   1588       InFlag = Chain.getValue(1);
   1589     }
   1590 
   1591   // For tail calls lower the arguments to the 'real' stack slot.
   1592   if (isTailCall) {
   1593     // Force all the incoming stack arguments to be loaded from the stack
   1594     // before any new outgoing arguments are stored to the stack, because the
   1595     // outgoing stack slots may alias the incoming argument stack slots, and
   1596     // the alias isn't otherwise explicit. This is slightly more conservative
   1597     // than necessary, because it means that each store effectively depends
   1598     // on every argument instead of just those arguments it would clobber.
   1599 
   1600     // Do not flag preceding copytoreg stuff together with the following stuff.
   1601     InFlag = SDValue();
   1602     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1603       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1604                                RegsToPass[i].second, InFlag);
   1605       InFlag = Chain.getValue(1);
   1606     }
   1607     InFlag = SDValue();
   1608   }
   1609 
   1610   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   1611   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   1612   // node so that legalize doesn't hack it.
   1613   bool isDirect = false;
   1614   bool isARMFunc = false;
   1615   bool isLocalARMFunc = false;
   1616   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1617 
   1618   if (EnableARMLongCalls) {
   1619     assert (getTargetMachine().getRelocationModel() == Reloc::Static
   1620             && "long-calls with non-static relocation model!");
   1621     // Handle a global address or an external symbol. If it's not one of
   1622     // those, the target's already in a register, so we don't need to do
   1623     // anything extra.
   1624     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1625       const GlobalValue *GV = G->getGlobal();
   1626       // Create a constant pool entry for the callee address
   1627       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1628       ARMConstantPoolValue *CPV =
   1629         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
   1630 
   1631       // Get the address of the callee into a register
   1632       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1633       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1634       Callee = DAG.getLoad(getPointerTy(), dl,
   1635                            DAG.getEntryNode(), CPAddr,
   1636                            MachinePointerInfo::getConstantPool(),
   1637                            false, false, false, 0);
   1638     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1639       const char *Sym = S->getSymbol();
   1640 
   1641       // Create a constant pool entry for the callee address
   1642       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1643       ARMConstantPoolValue *CPV =
   1644         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1645                                       ARMPCLabelIndex, 0);
   1646       // Get the address of the callee into a register
   1647       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1648       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1649       Callee = DAG.getLoad(getPointerTy(), dl,
   1650                            DAG.getEntryNode(), CPAddr,
   1651                            MachinePointerInfo::getConstantPool(),
   1652                            false, false, false, 0);
   1653     }
   1654   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1655     const GlobalValue *GV = G->getGlobal();
   1656     isDirect = true;
   1657     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
   1658     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
   1659                    getTargetMachine().getRelocationModel() != Reloc::Static;
   1660     isARMFunc = !Subtarget->isThumb() || isStub;
   1661     // ARM call to a local ARM function is predicable.
   1662     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
   1663     // tBX takes a register source operand.
   1664     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1665       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1666       ARMConstantPoolValue *CPV =
   1667         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
   1668       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1669       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1670       Callee = DAG.getLoad(getPointerTy(), dl,
   1671                            DAG.getEntryNode(), CPAddr,
   1672                            MachinePointerInfo::getConstantPool(),
   1673                            false, false, false, 0);
   1674       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1675       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1676                            getPointerTy(), Callee, PICLabel);
   1677     } else {
   1678       // On ELF targets for PIC code, direct calls should go through the PLT
   1679       unsigned OpFlags = 0;
   1680       if (Subtarget->isTargetELF() &&
   1681           getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1682         OpFlags = ARMII::MO_PLT;
   1683       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
   1684     }
   1685   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1686     isDirect = true;
   1687     bool isStub = Subtarget->isTargetDarwin() &&
   1688                   getTargetMachine().getRelocationModel() != Reloc::Static;
   1689     isARMFunc = !Subtarget->isThumb() || isStub;
   1690     // tBX takes a register source operand.
   1691     const char *Sym = S->getSymbol();
   1692     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1693       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1694       ARMConstantPoolValue *CPV =
   1695         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1696                                       ARMPCLabelIndex, 4);
   1697       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1698       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1699       Callee = DAG.getLoad(getPointerTy(), dl,
   1700                            DAG.getEntryNode(), CPAddr,
   1701                            MachinePointerInfo::getConstantPool(),
   1702                            false, false, false, 0);
   1703       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1704       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1705                            getPointerTy(), Callee, PICLabel);
   1706     } else {
   1707       unsigned OpFlags = 0;
   1708       // On ELF targets for PIC code, direct calls should go through the PLT
   1709       if (Subtarget->isTargetELF() &&
   1710                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1711         OpFlags = ARMII::MO_PLT;
   1712       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
   1713     }
   1714   }
   1715 
   1716   // FIXME: handle tail calls differently.
   1717   unsigned CallOpc;
   1718   bool HasMinSizeAttr = MF.getFunction()->getAttributes().
   1719     hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
   1720   if (Subtarget->isThumb()) {
   1721     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
   1722       CallOpc = ARMISD::CALL_NOLINK;
   1723     else
   1724       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
   1725   } else {
   1726     if (!isDirect && !Subtarget->hasV5TOps())
   1727       CallOpc = ARMISD::CALL_NOLINK;
   1728     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
   1729                // Emit regular call when code size is the priority
   1730                !HasMinSizeAttr)
   1731       // "mov lr, pc; b _foo" to avoid confusing the RSP
   1732       CallOpc = ARMISD::CALL_NOLINK;
   1733     else
   1734       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
   1735   }
   1736 
   1737   std::vector<SDValue> Ops;
   1738   Ops.push_back(Chain);
   1739   Ops.push_back(Callee);
   1740 
   1741   // Add argument registers to the end of the list so that they are known live
   1742   // into the call.
   1743   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   1744     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   1745                                   RegsToPass[i].second.getValueType()));
   1746 
   1747   // Add a register mask operand representing the call-preserved registers.
   1748   const uint32_t *Mask;
   1749   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
   1750   const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI);
   1751   if (isThisReturn) {
   1752     // For 'this' returns, use the R0-preserving mask if applicable
   1753     Mask = ARI->getThisReturnPreservedMask(CallConv);
   1754     if (!Mask) {
   1755       // Set isThisReturn to false if the calling convention is not one that
   1756       // allows 'returned' to be modeled in this way, so LowerCallResult does
   1757       // not try to pass 'this' straight through
   1758       isThisReturn = false;
   1759       Mask = ARI->getCallPreservedMask(CallConv);
   1760     }
   1761   } else
   1762     Mask = ARI->getCallPreservedMask(CallConv);
   1763 
   1764   assert(Mask && "Missing call preserved mask for calling convention");
   1765   Ops.push_back(DAG.getRegisterMask(Mask));
   1766 
   1767   if (InFlag.getNode())
   1768     Ops.push_back(InFlag);
   1769 
   1770   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1771   if (isTailCall)
   1772     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   1773 
   1774   // Returns a chain and a flag for retval copy to use.
   1775   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   1776   InFlag = Chain.getValue(1);
   1777 
   1778   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1779                              DAG.getIntPtrConstant(0, true), InFlag, dl);
   1780   if (!Ins.empty())
   1781     InFlag = Chain.getValue(1);
   1782 
   1783   // Handle result values, copying them out of physregs into vregs that we
   1784   // return.
   1785   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
   1786                          InVals, isThisReturn,
   1787                          isThisReturn ? OutVals[0] : SDValue());
   1788 }
   1789 
   1790 /// HandleByVal - Every parameter *after* a byval parameter is passed
   1791 /// on the stack.  Remember the next parameter register to allocate,
   1792 /// and then confiscate the rest of the parameter registers to insure
   1793 /// this.
   1794 void
   1795 ARMTargetLowering::HandleByVal(
   1796     CCState *State, unsigned &size, unsigned Align) const {
   1797   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   1798   assert((State->getCallOrPrologue() == Prologue ||
   1799           State->getCallOrPrologue() == Call) &&
   1800          "unhandled ParmContext");
   1801 
   1802   // For in-prologue parameters handling, we also introduce stack offset
   1803   // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal.
   1804   // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how
   1805   // NSAA should be evaluted (NSAA means "next stacked argument address").
   1806   // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs.
   1807   // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs.
   1808   unsigned NSAAOffset = State->getNextStackOffset();
   1809   if (State->getCallOrPrologue() != Call) {
   1810     for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) {
   1811       unsigned RB, RE;
   1812       State->getInRegsParamInfo(i, RB, RE);
   1813       assert(NSAAOffset >= (RE-RB)*4 &&
   1814              "Stack offset for byval regs doesn't introduced anymore?");
   1815       NSAAOffset -= (RE-RB)*4;
   1816     }
   1817   }
   1818   if ((ARM::R0 <= reg) && (reg <= ARM::R3)) {
   1819     if (Subtarget->isAAPCS_ABI() && Align > 4) {
   1820       unsigned AlignInRegs = Align / 4;
   1821       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
   1822       for (unsigned i = 0; i < Waste; ++i)
   1823         reg = State->AllocateReg(GPRArgRegs, 4);
   1824     }
   1825     if (reg != 0) {
   1826       unsigned excess = 4 * (ARM::R4 - reg);
   1827 
   1828       // Special case when NSAA != SP and parameter size greater than size of
   1829       // all remained GPR regs. In that case we can't split parameter, we must
   1830       // send it to stack. We also must set NCRN to R4, so waste all
   1831       // remained registers.
   1832       if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) {
   1833         while (State->AllocateReg(GPRArgRegs, 4))
   1834           ;
   1835         return;
   1836       }
   1837 
   1838       // First register for byval parameter is the first register that wasn't
   1839       // allocated before this method call, so it would be "reg".
   1840       // If parameter is small enough to be saved in range [reg, r4), then
   1841       // the end (first after last) register would be reg + param-size-in-regs,
   1842       // else parameter would be splitted between registers and stack,
   1843       // end register would be r4 in this case.
   1844       unsigned ByValRegBegin = reg;
   1845       unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4;
   1846       State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
   1847       // Note, first register is allocated in the beginning of function already,
   1848       // allocate remained amount of registers we need.
   1849       for (unsigned i = reg+1; i != ByValRegEnd; ++i)
   1850         State->AllocateReg(GPRArgRegs, 4);
   1851       // At a call site, a byval parameter that is split between
   1852       // registers and memory needs its size truncated here.  In a
   1853       // function prologue, such byval parameters are reassembled in
   1854       // memory, and are not truncated.
   1855       if (State->getCallOrPrologue() == Call) {
   1856         // Make remained size equal to 0 in case, when
   1857         // the whole structure may be stored into registers.
   1858         if (size < excess)
   1859           size = 0;
   1860         else
   1861           size -= excess;
   1862       }
   1863     }
   1864   }
   1865 }
   1866 
   1867 /// MatchingStackOffset - Return true if the given stack call argument is
   1868 /// already available in the same position (relatively) of the caller's
   1869 /// incoming argument stack.
   1870 static
   1871 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   1872                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
   1873                          const TargetInstrInfo *TII) {
   1874   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
   1875   int FI = INT_MAX;
   1876   if (Arg.getOpcode() == ISD::CopyFromReg) {
   1877     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
   1878     if (!TargetRegisterInfo::isVirtualRegister(VR))
   1879       return false;
   1880     MachineInstr *Def = MRI->getVRegDef(VR);
   1881     if (!Def)
   1882       return false;
   1883     if (!Flags.isByVal()) {
   1884       if (!TII->isLoadFromStackSlot(Def, FI))
   1885         return false;
   1886     } else {
   1887       return false;
   1888     }
   1889   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
   1890     if (Flags.isByVal())
   1891       // ByVal argument is passed in as a pointer but it's now being
   1892       // dereferenced. e.g.
   1893       // define @foo(%struct.X* %A) {
   1894       //   tail call @bar(%struct.X* byval %A)
   1895       // }
   1896       return false;
   1897     SDValue Ptr = Ld->getBasePtr();
   1898     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
   1899     if (!FINode)
   1900       return false;
   1901     FI = FINode->getIndex();
   1902   } else
   1903     return false;
   1904 
   1905   assert(FI != INT_MAX);
   1906   if (!MFI->isFixedObjectIndex(FI))
   1907     return false;
   1908   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
   1909 }
   1910 
   1911 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   1912 /// for tail call optimization. Targets which want to do tail call
   1913 /// optimization should implement this function.
   1914 bool
   1915 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   1916                                                      CallingConv::ID CalleeCC,
   1917                                                      bool isVarArg,
   1918                                                      bool isCalleeStructRet,
   1919                                                      bool isCallerStructRet,
   1920                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   1921                                     const SmallVectorImpl<SDValue> &OutVals,
   1922                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   1923                                                      SelectionDAG& DAG) const {
   1924   const Function *CallerF = DAG.getMachineFunction().getFunction();
   1925   CallingConv::ID CallerCC = CallerF->getCallingConv();
   1926   bool CCMatch = CallerCC == CalleeCC;
   1927 
   1928   // Look for obvious safe cases to perform tail call optimization that do not
   1929   // require ABI changes. This is what gcc calls sibcall.
   1930 
   1931   // Do not sibcall optimize vararg calls unless the call site is not passing
   1932   // any arguments.
   1933   if (isVarArg && !Outs.empty())
   1934     return false;
   1935 
   1936   // Also avoid sibcall optimization if either caller or callee uses struct
   1937   // return semantics.
   1938   if (isCalleeStructRet || isCallerStructRet)
   1939     return false;
   1940 
   1941   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
   1942   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
   1943   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
   1944   // support in the assembler and linker to be used. This would need to be
   1945   // fixed to fully support tail calls in Thumb1.
   1946   //
   1947   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   1948   // LR.  This means if we need to reload LR, it takes an extra instructions,
   1949   // which outweighs the value of the tail call; but here we don't know yet
   1950   // whether LR is going to be used.  Probably the right approach is to
   1951   // generate the tail call here and turn it back into CALL/RET in
   1952   // emitEpilogue if LR is used.
   1953 
   1954   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   1955   // but we need to make sure there are enough registers; the only valid
   1956   // registers are the 4 used for parameters.  We don't currently do this
   1957   // case.
   1958   if (Subtarget->isThumb1Only())
   1959     return false;
   1960 
   1961   // If the calling conventions do not match, then we'd better make sure the
   1962   // results are returned in the same way as what the caller expects.
   1963   if (!CCMatch) {
   1964     SmallVector<CCValAssign, 16> RVLocs1;
   1965     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
   1966                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
   1967     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
   1968 
   1969     SmallVector<CCValAssign, 16> RVLocs2;
   1970     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
   1971                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
   1972     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
   1973 
   1974     if (RVLocs1.size() != RVLocs2.size())
   1975       return false;
   1976     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
   1977       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
   1978         return false;
   1979       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
   1980         return false;
   1981       if (RVLocs1[i].isRegLoc()) {
   1982         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
   1983           return false;
   1984       } else {
   1985         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
   1986           return false;
   1987       }
   1988     }
   1989   }
   1990 
   1991   // If Caller's vararg or byval argument has been split between registers and
   1992   // stack, do not perform tail call, since part of the argument is in caller's
   1993   // local frame.
   1994   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
   1995                                       getInfo<ARMFunctionInfo>();
   1996   if (AFI_Caller->getArgRegsSaveSize())
   1997     return false;
   1998 
   1999   // If the callee takes no arguments then go on to check the results of the
   2000   // call.
   2001   if (!Outs.empty()) {
   2002     // Check if stack adjustment is needed. For now, do not do this if any
   2003     // argument is passed on the stack.
   2004     SmallVector<CCValAssign, 16> ArgLocs;
   2005     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
   2006                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   2007     CCInfo.AnalyzeCallOperands(Outs,
   2008                                CCAssignFnForNode(CalleeCC, false, isVarArg));
   2009     if (CCInfo.getNextStackOffset()) {
   2010       MachineFunction &MF = DAG.getMachineFunction();
   2011 
   2012       // Check if the arguments are already laid out in the right way as
   2013       // the caller's fixed stack objects.
   2014       MachineFrameInfo *MFI = MF.getFrameInfo();
   2015       const MachineRegisterInfo *MRI = &MF.getRegInfo();
   2016       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   2017       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   2018            i != e;
   2019            ++i, ++realArgIdx) {
   2020         CCValAssign &VA = ArgLocs[i];
   2021         EVT RegVT = VA.getLocVT();
   2022         SDValue Arg = OutVals[realArgIdx];
   2023         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   2024         if (VA.getLocInfo() == CCValAssign::Indirect)
   2025           return false;
   2026         if (VA.needsCustom()) {
   2027           // f64 and vector types are split into multiple registers or
   2028           // register/stack-slot combinations.  The types will not match
   2029           // the registers; give up on memory f64 refs until we figure
   2030           // out what to do about this.
   2031           if (!VA.isRegLoc())
   2032             return false;
   2033           if (!ArgLocs[++i].isRegLoc())
   2034             return false;
   2035           if (RegVT == MVT::v2f64) {
   2036             if (!ArgLocs[++i].isRegLoc())
   2037               return false;
   2038             if (!ArgLocs[++i].isRegLoc())
   2039               return false;
   2040           }
   2041         } else if (!VA.isRegLoc()) {
   2042           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
   2043                                    MFI, MRI, TII))
   2044             return false;
   2045         }
   2046       }
   2047     }
   2048   }
   2049 
   2050   return true;
   2051 }
   2052 
   2053 bool
   2054 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
   2055                                   MachineFunction &MF, bool isVarArg,
   2056                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
   2057                                   LLVMContext &Context) const {
   2058   SmallVector<CCValAssign, 16> RVLocs;
   2059   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
   2060   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
   2061                                                     isVarArg));
   2062 }
   2063 
   2064 SDValue
   2065 ARMTargetLowering::LowerReturn(SDValue Chain,
   2066                                CallingConv::ID CallConv, bool isVarArg,
   2067                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   2068                                const SmallVectorImpl<SDValue> &OutVals,
   2069                                SDLoc dl, SelectionDAG &DAG) const {
   2070 
   2071   // CCValAssign - represent the assignment of the return value to a location.
   2072   SmallVector<CCValAssign, 16> RVLocs;
   2073 
   2074   // CCState - Info about the registers and stack slots.
   2075   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2076                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   2077 
   2078   // Analyze outgoing return values.
   2079   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
   2080                                                isVarArg));
   2081 
   2082   SDValue Flag;
   2083   SmallVector<SDValue, 4> RetOps;
   2084   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   2085 
   2086   // Copy the result values into the output registers.
   2087   for (unsigned i = 0, realRVLocIdx = 0;
   2088        i != RVLocs.size();
   2089        ++i, ++realRVLocIdx) {
   2090     CCValAssign &VA = RVLocs[i];
   2091     assert(VA.isRegLoc() && "Can only return in registers!");
   2092 
   2093     SDValue Arg = OutVals[realRVLocIdx];
   2094 
   2095     switch (VA.getLocInfo()) {
   2096     default: llvm_unreachable("Unknown loc info!");
   2097     case CCValAssign::Full: break;
   2098     case CCValAssign::BCvt:
   2099       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   2100       break;
   2101     }
   2102 
   2103     if (VA.needsCustom()) {
   2104       if (VA.getLocVT() == MVT::v2f64) {
   2105         // Extract the first half and return it in two registers.
   2106         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   2107                                    DAG.getConstant(0, MVT::i32));
   2108         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
   2109                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
   2110 
   2111         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
   2112         Flag = Chain.getValue(1);
   2113         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2114         VA = RVLocs[++i]; // skip ahead to next loc
   2115         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
   2116                                  HalfGPRs.getValue(1), Flag);
   2117         Flag = Chain.getValue(1);
   2118         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2119         VA = RVLocs[++i]; // skip ahead to next loc
   2120 
   2121         // Extract the 2nd half and fall through to handle it as an f64 value.
   2122         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   2123                           DAG.getConstant(1, MVT::i32));
   2124       }
   2125       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
   2126       // available.
   2127       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   2128                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
   2129       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
   2130       Flag = Chain.getValue(1);
   2131       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2132       VA = RVLocs[++i]; // skip ahead to next loc
   2133       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
   2134                                Flag);
   2135     } else
   2136       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
   2137 
   2138     // Guarantee that all emitted copies are
   2139     // stuck together, avoiding something bad.
   2140     Flag = Chain.getValue(1);
   2141     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2142   }
   2143 
   2144   // Update chain and glue.
   2145   RetOps[0] = Chain;
   2146   if (Flag.getNode())
   2147     RetOps.push_back(Flag);
   2148 
   2149   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
   2150                      RetOps.data(), RetOps.size());
   2151 }
   2152 
   2153 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   2154   if (N->getNumValues() != 1)
   2155     return false;
   2156   if (!N->hasNUsesOfValue(1, 0))
   2157     return false;
   2158 
   2159   SDValue TCChain = Chain;
   2160   SDNode *Copy = *N->use_begin();
   2161   if (Copy->getOpcode() == ISD::CopyToReg) {
   2162     // If the copy has a glue operand, we conservatively assume it isn't safe to
   2163     // perform a tail call.
   2164     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
   2165       return false;
   2166     TCChain = Copy->getOperand(0);
   2167   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
   2168     SDNode *VMov = Copy;
   2169     // f64 returned in a pair of GPRs.
   2170     SmallPtrSet<SDNode*, 2> Copies;
   2171     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2172          UI != UE; ++UI) {
   2173       if (UI->getOpcode() != ISD::CopyToReg)
   2174         return false;
   2175       Copies.insert(*UI);
   2176     }
   2177     if (Copies.size() > 2)
   2178       return false;
   2179 
   2180     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2181          UI != UE; ++UI) {
   2182       SDValue UseChain = UI->getOperand(0);
   2183       if (Copies.count(UseChain.getNode()))
   2184         // Second CopyToReg
   2185         Copy = *UI;
   2186       else
   2187         // First CopyToReg
   2188         TCChain = UseChain;
   2189     }
   2190   } else if (Copy->getOpcode() == ISD::BITCAST) {
   2191     // f32 returned in a single GPR.
   2192     if (!Copy->hasOneUse())
   2193       return false;
   2194     Copy = *Copy->use_begin();
   2195     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
   2196       return false;
   2197     TCChain = Copy->getOperand(0);
   2198   } else {
   2199     return false;
   2200   }
   2201 
   2202   bool HasRet = false;
   2203   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
   2204        UI != UE; ++UI) {
   2205     if (UI->getOpcode() != ARMISD::RET_FLAG)
   2206       return false;
   2207     HasRet = true;
   2208   }
   2209 
   2210   if (!HasRet)
   2211     return false;
   2212 
   2213   Chain = TCChain;
   2214   return true;
   2215 }
   2216 
   2217 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   2218   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   2219     return false;
   2220 
   2221   if (!CI->isTailCall())
   2222     return false;
   2223 
   2224   return !Subtarget->isThumb1Only();
   2225 }
   2226 
   2227 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
   2228 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
   2229 // one of the above mentioned nodes. It has to be wrapped because otherwise
   2230 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
   2231 // be used to form addressing mode. These wrapped nodes will be selected
   2232 // into MOVi.
   2233 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   2234   EVT PtrVT = Op.getValueType();
   2235   // FIXME there is no actual debug info here
   2236   SDLoc dl(Op);
   2237   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   2238   SDValue Res;
   2239   if (CP->isMachineConstantPoolEntry())
   2240     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
   2241                                     CP->getAlignment());
   2242   else
   2243     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
   2244                                     CP->getAlignment());
   2245   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
   2246 }
   2247 
   2248 unsigned ARMTargetLowering::getJumpTableEncoding() const {
   2249   return MachineJumpTableInfo::EK_Inline;
   2250 }
   2251 
   2252 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   2253                                              SelectionDAG &DAG) const {
   2254   MachineFunction &MF = DAG.getMachineFunction();
   2255   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2256   unsigned ARMPCLabelIndex = 0;
   2257   SDLoc DL(Op);
   2258   EVT PtrVT = getPointerTy();
   2259   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   2260   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2261   SDValue CPAddr;
   2262   if (RelocM == Reloc::Static) {
   2263     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   2264   } else {
   2265     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2266     ARMPCLabelIndex = AFI->createPICLabelUId();
   2267     ARMConstantPoolValue *CPV =
   2268       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
   2269                                       ARMCP::CPBlockAddress, PCAdj);
   2270     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2271   }
   2272   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   2273   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
   2274                                MachinePointerInfo::getConstantPool(),
   2275                                false, false, false, 0);
   2276   if (RelocM == Reloc::Static)
   2277     return Result;
   2278   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2279   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
   2280 }
   2281 
   2282 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
   2283 SDValue
   2284 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   2285                                                  SelectionDAG &DAG) const {
   2286   SDLoc dl(GA);
   2287   EVT PtrVT = getPointerTy();
   2288   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2289   MachineFunction &MF = DAG.getMachineFunction();
   2290   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2291   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2292   ARMConstantPoolValue *CPV =
   2293     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2294                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   2295   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2296   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   2297   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
   2298                          MachinePointerInfo::getConstantPool(),
   2299                          false, false, false, 0);
   2300   SDValue Chain = Argument.getValue(1);
   2301 
   2302   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2303   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
   2304 
   2305   // call __tls_get_addr.
   2306   ArgListTy Args;
   2307   ArgListEntry Entry;
   2308   Entry.Node = Argument;
   2309   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
   2310   Args.push_back(Entry);
   2311   // FIXME: is there useful debug info available here?
   2312   TargetLowering::CallLoweringInfo CLI(Chain,
   2313                 (Type *) Type::getInt32Ty(*DAG.getContext()),
   2314                 false, false, false, false,
   2315                 0, CallingConv::C, /*isTailCall=*/false,
   2316                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
   2317                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   2318   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   2319   return CallResult.first;
   2320 }
   2321 
   2322 // Lower ISD::GlobalTLSAddress using the "initial exec" or
   2323 // "local exec" model.
   2324 SDValue
   2325 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   2326                                         SelectionDAG &DAG,
   2327                                         TLSModel::Model model) const {
   2328   const GlobalValue *GV = GA->getGlobal();
   2329   SDLoc dl(GA);
   2330   SDValue Offset;
   2331   SDValue Chain = DAG.getEntryNode();
   2332   EVT PtrVT = getPointerTy();
   2333   // Get the Thread Pointer
   2334   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2335 
   2336   if (model == TLSModel::InitialExec) {
   2337     MachineFunction &MF = DAG.getMachineFunction();
   2338     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2339     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2340     // Initial exec model.
   2341     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2342     ARMConstantPoolValue *CPV =
   2343       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2344                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
   2345                                       true);
   2346     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2347     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2348     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2349                          MachinePointerInfo::getConstantPool(),
   2350                          false, false, false, 0);
   2351     Chain = Offset.getValue(1);
   2352 
   2353     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2354     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
   2355 
   2356     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2357                          MachinePointerInfo::getConstantPool(),
   2358                          false, false, false, 0);
   2359   } else {
   2360     // local exec model
   2361     assert(model == TLSModel::LocalExec);
   2362     ARMConstantPoolValue *CPV =
   2363       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
   2364     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2365     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2366     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2367                          MachinePointerInfo::getConstantPool(),
   2368                          false, false, false, 0);
   2369   }
   2370 
   2371   // The address of the thread local variable is the add of the thread
   2372   // pointer with the offset of the variable.
   2373   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
   2374 }
   2375 
   2376 SDValue
   2377 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   2378   // TODO: implement the "local dynamic" model
   2379   assert(Subtarget->isTargetELF() &&
   2380          "TLS not implemented for non-ELF targets");
   2381   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   2382 
   2383   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
   2384 
   2385   switch (model) {
   2386     case TLSModel::GeneralDynamic:
   2387     case TLSModel::LocalDynamic:
   2388       return LowerToTLSGeneralDynamicModel(GA, DAG);
   2389     case TLSModel::InitialExec:
   2390     case TLSModel::LocalExec:
   2391       return LowerToTLSExecModels(GA, DAG, model);
   2392   }
   2393   llvm_unreachable("bogus TLS model");
   2394 }
   2395 
   2396 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   2397                                                  SelectionDAG &DAG) const {
   2398   EVT PtrVT = getPointerTy();
   2399   SDLoc dl(Op);
   2400   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2401   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   2402     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2403     ARMConstantPoolValue *CPV =
   2404       ARMConstantPoolConstant::Create(GV,
   2405                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2406     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2407     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2408     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
   2409                                  CPAddr,
   2410                                  MachinePointerInfo::getConstantPool(),
   2411                                  false, false, false, 0);
   2412     SDValue Chain = Result.getValue(1);
   2413     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   2414     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
   2415     if (!UseGOTOFF)
   2416       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
   2417                            MachinePointerInfo::getGOT(),
   2418                            false, false, false, 0);
   2419     return Result;
   2420   }
   2421 
   2422   // If we have T2 ops, we can materialize the address directly via movt/movw
   2423   // pair. This is always cheaper.
   2424   if (Subtarget->useMovt()) {
   2425     ++NumMovwMovt;
   2426     // FIXME: Once remat is capable of dealing with instructions with register
   2427     // operands, expand this into two nodes.
   2428     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2429                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2430   } else {
   2431     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2432     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2433     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2434                        MachinePointerInfo::getConstantPool(),
   2435                        false, false, false, 0);
   2436   }
   2437 }
   2438 
   2439 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   2440                                                     SelectionDAG &DAG) const {
   2441   EVT PtrVT = getPointerTy();
   2442   SDLoc dl(Op);
   2443   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2444   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2445 
   2446   // FIXME: Enable this for static codegen when tool issues are fixed.  Also
   2447   // update ARMFastISel::ARMMaterializeGV.
   2448   if (Subtarget->useMovt() && RelocM != Reloc::Static) {
   2449     ++NumMovwMovt;
   2450     // FIXME: Once remat is capable of dealing with instructions with register
   2451     // operands, expand this into two nodes.
   2452     if (RelocM == Reloc::Static)
   2453       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2454                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2455 
   2456     unsigned Wrapper = (RelocM == Reloc::PIC_)
   2457       ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
   2458     SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
   2459                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2460     if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2461       Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
   2462                            MachinePointerInfo::getGOT(),
   2463                            false, false, false, 0);
   2464     return Result;
   2465   }
   2466 
   2467   unsigned ARMPCLabelIndex = 0;
   2468   SDValue CPAddr;
   2469   if (RelocM == Reloc::Static) {
   2470     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2471   } else {
   2472     ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   2473     ARMPCLabelIndex = AFI->createPICLabelUId();
   2474     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
   2475     ARMConstantPoolValue *CPV =
   2476       ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
   2477                                       PCAdj);
   2478     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2479   }
   2480   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2481 
   2482   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2483                                MachinePointerInfo::getConstantPool(),
   2484                                false, false, false, 0);
   2485   SDValue Chain = Result.getValue(1);
   2486 
   2487   if (RelocM == Reloc::PIC_) {
   2488     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2489     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2490   }
   2491 
   2492   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2493     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
   2494                          false, false, false, 0);
   2495 
   2496   return Result;
   2497 }
   2498 
   2499 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   2500                                                     SelectionDAG &DAG) const {
   2501   assert(Subtarget->isTargetELF() &&
   2502          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   2503   MachineFunction &MF = DAG.getMachineFunction();
   2504   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2505   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2506   EVT PtrVT = getPointerTy();
   2507   SDLoc dl(Op);
   2508   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2509   ARMConstantPoolValue *CPV =
   2510     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
   2511                                   ARMPCLabelIndex, PCAdj);
   2512   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2513   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2514   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2515                                MachinePointerInfo::getConstantPool(),
   2516                                false, false, false, 0);
   2517   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2518   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2519 }
   2520 
   2521 SDValue
   2522 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   2523   SDLoc dl(Op);
   2524   SDValue Val = DAG.getConstant(0, MVT::i32);
   2525   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
   2526                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
   2527                      Op.getOperand(1), Val);
   2528 }
   2529 
   2530 SDValue
   2531 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   2532   SDLoc dl(Op);
   2533   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
   2534                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
   2535 }
   2536 
   2537 SDValue
   2538 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   2539                                           const ARMSubtarget *Subtarget) const {
   2540   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2541   SDLoc dl(Op);
   2542   switch (IntNo) {
   2543   default: return SDValue();    // Don't custom lower most intrinsics.
   2544   case Intrinsic::arm_thread_pointer: {
   2545     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2546     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2547   }
   2548   case Intrinsic::eh_sjlj_lsda: {
   2549     MachineFunction &MF = DAG.getMachineFunction();
   2550     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2551     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2552     EVT PtrVT = getPointerTy();
   2553     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2554     SDValue CPAddr;
   2555     unsigned PCAdj = (RelocM != Reloc::PIC_)
   2556       ? 0 : (Subtarget->isThumb() ? 4 : 8);
   2557     ARMConstantPoolValue *CPV =
   2558       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
   2559                                       ARMCP::CPLSDA, PCAdj);
   2560     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2561     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2562     SDValue Result =
   2563       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2564                   MachinePointerInfo::getConstantPool(),
   2565                   false, false, false, 0);
   2566 
   2567     if (RelocM == Reloc::PIC_) {
   2568       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2569       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2570     }
   2571     return Result;
   2572   }
   2573   case Intrinsic::arm_neon_vmulls:
   2574   case Intrinsic::arm_neon_vmullu: {
   2575     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
   2576       ? ARMISD::VMULLs : ARMISD::VMULLu;
   2577     return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
   2578                        Op.getOperand(1), Op.getOperand(2));
   2579   }
   2580   }
   2581 }
   2582 
   2583 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
   2584                                  const ARMSubtarget *Subtarget) {
   2585   // FIXME: handle "fence singlethread" more efficiently.
   2586   SDLoc dl(Op);
   2587   if (!Subtarget->hasDataBarrier()) {
   2588     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2589     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2590     // here.
   2591     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2592            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2593     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2594                        DAG.getConstant(0, MVT::i32));
   2595   }
   2596 
   2597   ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
   2598   AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
   2599   unsigned Domain = ARM_MB::ISH;
   2600   if (Subtarget->isSwift() && Ord == Release) {
   2601     // Swift happens to implement ISHST barriers in a way that's compatible with
   2602     // Release semantics but weaker than ISH so we'd be fools not to use
   2603     // it. Beware: other processors probably don't!
   2604     Domain = ARM_MB::ISHST;
   2605   }
   2606 
   2607   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2608                      DAG.getConstant(Domain, MVT::i32));
   2609 }
   2610 
   2611 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
   2612                              const ARMSubtarget *Subtarget) {
   2613   // ARM pre v5TE and Thumb1 does not have preload instructions.
   2614   if (!(Subtarget->isThumb2() ||
   2615         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
   2616     // Just preserve the chain.
   2617     return Op.getOperand(0);
   2618 
   2619   SDLoc dl(Op);
   2620   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
   2621   if (!isRead &&
   2622       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
   2623     // ARMv7 with MP extension has PLDW.
   2624     return Op.getOperand(0);
   2625 
   2626   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
   2627   if (Subtarget->isThumb()) {
   2628     // Invert the bits.
   2629     isRead = ~isRead & 1;
   2630     isData = ~isData & 1;
   2631   }
   2632 
   2633   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
   2634                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
   2635                      DAG.getConstant(isData, MVT::i32));
   2636 }
   2637 
   2638 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   2639   MachineFunction &MF = DAG.getMachineFunction();
   2640   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
   2641 
   2642   // vastart just stores the address of the VarArgsFrameIndex slot into the
   2643   // memory location argument.
   2644   SDLoc dl(Op);
   2645   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2646   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   2647   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2648   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
   2649                       MachinePointerInfo(SV), false, false, 0);
   2650 }
   2651 
   2652 SDValue
   2653 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   2654                                         SDValue &Root, SelectionDAG &DAG,
   2655                                         SDLoc dl) const {
   2656   MachineFunction &MF = DAG.getMachineFunction();
   2657   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2658 
   2659   const TargetRegisterClass *RC;
   2660   if (AFI->isThumb1OnlyFunction())
   2661     RC = &ARM::tGPRRegClass;
   2662   else
   2663     RC = &ARM::GPRRegClass;
   2664 
   2665   // Transform the arguments stored in physical registers into virtual ones.
   2666   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2667   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2668 
   2669   SDValue ArgValue2;
   2670   if (NextVA.isMemLoc()) {
   2671     MachineFrameInfo *MFI = MF.getFrameInfo();
   2672     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
   2673 
   2674     // Create load node to retrieve arguments from the stack.
   2675     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2676     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
   2677                             MachinePointerInfo::getFixedStack(FI),
   2678                             false, false, false, 0);
   2679   } else {
   2680     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
   2681     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2682   }
   2683 
   2684   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
   2685 }
   2686 
   2687 void
   2688 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   2689                                   unsigned InRegsParamRecordIdx,
   2690                                   unsigned ArgSize,
   2691                                   unsigned &ArgRegsSize,
   2692                                   unsigned &ArgRegsSaveSize)
   2693   const {
   2694   unsigned NumGPRs;
   2695   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
   2696     unsigned RBegin, REnd;
   2697     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
   2698     NumGPRs = REnd - RBegin;
   2699   } else {
   2700     unsigned int firstUnalloced;
   2701     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
   2702                                                 sizeof(GPRArgRegs) /
   2703                                                 sizeof(GPRArgRegs[0]));
   2704     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
   2705   }
   2706 
   2707   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
   2708   ArgRegsSize = NumGPRs * 4;
   2709 
   2710   // If parameter is split between stack and GPRs...
   2711   if (NumGPRs && Align == 8 &&
   2712       (ArgRegsSize < ArgSize ||
   2713         InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) {
   2714     // Add padding for part of param recovered from GPRs, so
   2715     // its last byte must be at address K*8 - 1.
   2716     // We need to do it, since remained (stack) part of parameter has
   2717     // stack alignment, and we need to "attach" "GPRs head" without gaps
   2718     // to it:
   2719     // Stack:
   2720     // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes...
   2721     // [ [padding] [GPRs head] ] [        Tail passed via stack       ....
   2722     //
   2723     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2724     unsigned Padding =
   2725         ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) -
   2726         (ArgRegsSize + AFI->getArgRegsSaveSize());
   2727     ArgRegsSaveSize = ArgRegsSize + Padding;
   2728   } else
   2729     // We don't need to extend regs save size for byval parameters if they
   2730     // are passed via GPRs only.
   2731     ArgRegsSaveSize = ArgRegsSize;
   2732 }
   2733 
   2734 // The remaining GPRs hold either the beginning of variable-argument
   2735 // data, or the beginning of an aggregate passed by value (usually
   2736 // byval).  Either way, we allocate stack slots adjacent to the data
   2737 // provided by our caller, and store the unallocated registers there.
   2738 // If this is a variadic function, the va_list pointer will begin with
   2739 // these values; otherwise, this reassembles a (byval) structure that
   2740 // was split between registers and memory.
   2741 // Return: The frame index registers were stored into.
   2742 int
   2743 ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
   2744                                   SDLoc dl, SDValue &Chain,
   2745                                   const Value *OrigArg,
   2746                                   unsigned InRegsParamRecordIdx,
   2747                                   unsigned OffsetFromOrigArg,
   2748                                   unsigned ArgOffset,
   2749                                   unsigned ArgSize,
   2750                                   bool ForceMutable) const {
   2751 
   2752   // Currently, two use-cases possible:
   2753   // Case #1. Non var-args function, and we meet first byval parameter.
   2754   //          Setup first unallocated register as first byval register;
   2755   //          eat all remained registers
   2756   //          (these two actions are performed by HandleByVal method).
   2757   //          Then, here, we initialize stack frame with
   2758   //          "store-reg" instructions.
   2759   // Case #2. Var-args function, that doesn't contain byval parameters.
   2760   //          The same: eat all remained unallocated registers,
   2761   //          initialize stack frame.
   2762 
   2763   MachineFunction &MF = DAG.getMachineFunction();
   2764   MachineFrameInfo *MFI = MF.getFrameInfo();
   2765   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2766   unsigned firstRegToSaveIndex, lastRegToSaveIndex;
   2767   unsigned RBegin, REnd;
   2768   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
   2769     CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
   2770     firstRegToSaveIndex = RBegin - ARM::R0;
   2771     lastRegToSaveIndex = REnd - ARM::R0;
   2772   } else {
   2773     firstRegToSaveIndex = CCInfo.getFirstUnallocated
   2774       (GPRArgRegs, array_lengthof(GPRArgRegs));
   2775     lastRegToSaveIndex = 4;
   2776   }
   2777 
   2778   unsigned ArgRegsSize, ArgRegsSaveSize;
   2779   computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize,
   2780                  ArgRegsSize, ArgRegsSaveSize);
   2781 
   2782   // Store any by-val regs to their spots on the stack so that they may be
   2783   // loaded by deferencing the result of formal parameter pointer or va_next.
   2784   // Note: once stack area for byval/varargs registers
   2785   // was initialized, it can't be initialized again.
   2786   if (ArgRegsSaveSize) {
   2787 
   2788     unsigned Padding = ArgRegsSaveSize - ArgRegsSize;
   2789 
   2790     if (Padding) {
   2791       assert(AFI->getStoredByValParamsPadding() == 0 &&
   2792              "The only parameter may be padded.");
   2793       AFI->setStoredByValParamsPadding(Padding);
   2794     }
   2795 
   2796     int FrameIndex = MFI->CreateFixedObject(
   2797                       ArgRegsSaveSize,
   2798                       Padding + ArgOffset,
   2799                       false);
   2800     SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy());
   2801 
   2802     SmallVector<SDValue, 4> MemOps;
   2803     for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex;
   2804          ++firstRegToSaveIndex, ++i) {
   2805       const TargetRegisterClass *RC;
   2806       if (AFI->isThumb1OnlyFunction())
   2807         RC = &ARM::tGPRRegClass;
   2808       else
   2809         RC = &ARM::GPRRegClass;
   2810 
   2811       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
   2812       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
   2813       SDValue Store =
   2814         DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2815                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
   2816                      false, false, 0);
   2817       MemOps.push_back(Store);
   2818       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
   2819                         DAG.getConstant(4, getPointerTy()));
   2820     }
   2821 
   2822     AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize());
   2823 
   2824     if (!MemOps.empty())
   2825       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2826                           &MemOps[0], MemOps.size());
   2827     return FrameIndex;
   2828   } else
   2829     // This will point to the next argument passed via stack.
   2830     return MFI->CreateFixedObject(
   2831         4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable);
   2832 }
   2833 
   2834 // Setup stack frame, the va_list pointer will start from.
   2835 void
   2836 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   2837                                         SDLoc dl, SDValue &Chain,
   2838                                         unsigned ArgOffset,
   2839                                         bool ForceMutable) const {
   2840   MachineFunction &MF = DAG.getMachineFunction();
   2841   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2842 
   2843   // Try to store any remaining integer argument regs
   2844   // to their spots on the stack so that they may be loaded by deferencing
   2845   // the result of va_next.
   2846   // If there is no regs to be stored, just point address after last
   2847   // argument passed via stack.
   2848   int FrameIndex =
   2849     StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(),
   2850                    0, ArgOffset, 0, ForceMutable);
   2851 
   2852   AFI->setVarArgsFrameIndex(FrameIndex);
   2853 }
   2854 
   2855 SDValue
   2856 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   2857                                         CallingConv::ID CallConv, bool isVarArg,
   2858                                         const SmallVectorImpl<ISD::InputArg>
   2859                                           &Ins,
   2860                                         SDLoc dl, SelectionDAG &DAG,
   2861                                         SmallVectorImpl<SDValue> &InVals)
   2862                                           const {
   2863   MachineFunction &MF = DAG.getMachineFunction();
   2864   MachineFrameInfo *MFI = MF.getFrameInfo();
   2865 
   2866   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2867 
   2868   // Assign locations to all of the incoming arguments.
   2869   SmallVector<CCValAssign, 16> ArgLocs;
   2870   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2871                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
   2872   CCInfo.AnalyzeFormalArguments(Ins,
   2873                                 CCAssignFnForNode(CallConv, /* Return*/ false,
   2874                                                   isVarArg));
   2875 
   2876   SmallVector<SDValue, 16> ArgValues;
   2877   int lastInsIndex = -1;
   2878   SDValue ArgValue;
   2879   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   2880   unsigned CurArgIdx = 0;
   2881 
   2882   // Initially ArgRegsSaveSize is zero.
   2883   // Then we increase this value each time we meet byval parameter.
   2884   // We also increase this value in case of varargs function.
   2885   AFI->setArgRegsSaveSize(0);
   2886 
   2887   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2888     CCValAssign &VA = ArgLocs[i];
   2889     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
   2890     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
   2891     // Arguments stored in registers.
   2892     if (VA.isRegLoc()) {
   2893       EVT RegVT = VA.getLocVT();
   2894 
   2895       if (VA.needsCustom()) {
   2896         // f64 and vector types are split up into multiple registers or
   2897         // combinations of registers and stack slots.
   2898         if (VA.getLocVT() == MVT::v2f64) {
   2899           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
   2900                                                    Chain, DAG, dl);
   2901           VA = ArgLocs[++i]; // skip ahead to next loc
   2902           SDValue ArgValue2;
   2903           if (VA.isMemLoc()) {
   2904             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
   2905             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2906             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
   2907                                     MachinePointerInfo::getFixedStack(FI),
   2908                                     false, false, false, 0);
   2909           } else {
   2910             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
   2911                                              Chain, DAG, dl);
   2912           }
   2913           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   2914           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2915                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
   2916           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2917                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
   2918         } else
   2919           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
   2920 
   2921       } else {
   2922         const TargetRegisterClass *RC;
   2923 
   2924         if (RegVT == MVT::f32)
   2925           RC = &ARM::SPRRegClass;
   2926         else if (RegVT == MVT::f64)
   2927           RC = &ARM::DPRRegClass;
   2928         else if (RegVT == MVT::v2f64)
   2929           RC = &ARM::QPRRegClass;
   2930         else if (RegVT == MVT::i32)
   2931           RC = AFI->isThumb1OnlyFunction() ?
   2932             (const TargetRegisterClass*)&ARM::tGPRRegClass :
   2933             (const TargetRegisterClass*)&ARM::GPRRegClass;
   2934         else
   2935           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
   2936 
   2937         // Transform the arguments in physical registers into virtual ones.
   2938         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2939         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
   2940       }
   2941 
   2942       // If this is an 8 or 16-bit value, it is really passed promoted
   2943       // to 32 bits.  Insert an assert[sz]ext to capture this, then
   2944       // truncate to the right size.
   2945       switch (VA.getLocInfo()) {
   2946       default: llvm_unreachable("Unknown loc info!");
   2947       case CCValAssign::Full: break;
   2948       case CCValAssign::BCvt:
   2949         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
   2950         break;
   2951       case CCValAssign::SExt:
   2952         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
   2953                                DAG.getValueType(VA.getValVT()));
   2954         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2955         break;
   2956       case CCValAssign::ZExt:
   2957         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
   2958                                DAG.getValueType(VA.getValVT()));
   2959         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2960         break;
   2961       }
   2962 
   2963       InVals.push_back(ArgValue);
   2964 
   2965     } else { // VA.isRegLoc()
   2966 
   2967       // sanity check
   2968       assert(VA.isMemLoc());
   2969       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
   2970 
   2971       int index = ArgLocs[i].getValNo();
   2972 
   2973       // Some Ins[] entries become multiple ArgLoc[] entries.
   2974       // Process them only once.
   2975       if (index != lastInsIndex)
   2976         {
   2977           ISD::ArgFlagsTy Flags = Ins[index].Flags;
   2978           // FIXME: For now, all byval parameter objects are marked mutable.
   2979           // This can be changed with more analysis.
   2980           // In case of tail call optimization mark all arguments mutable.
   2981           // Since they could be overwritten by lowering of arguments in case of
   2982           // a tail call.
   2983           if (Flags.isByVal()) {
   2984             unsigned CurByValIndex = CCInfo.getInRegsParamsProceed();
   2985             int FrameIndex = StoreByValRegs(
   2986                 CCInfo, DAG, dl, Chain, CurOrigArg,
   2987                 CurByValIndex,
   2988                 Ins[VA.getValNo()].PartOffset,
   2989                 VA.getLocMemOffset(),
   2990                 Flags.getByValSize(),
   2991                 true /*force mutable frames*/);
   2992             InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy()));
   2993             CCInfo.nextInRegsParam();
   2994           } else {
   2995             unsigned FIOffset = VA.getLocMemOffset() +
   2996                                 AFI->getStoredByValParamsPadding();
   2997             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
   2998                                             FIOffset, true);
   2999 
   3000             // Create load nodes to retrieve arguments from the stack.
   3001             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   3002             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
   3003                                          MachinePointerInfo::getFixedStack(FI),
   3004                                          false, false, false, 0));
   3005           }
   3006           lastInsIndex = index;
   3007         }
   3008     }
   3009   }
   3010 
   3011   // varargs
   3012   if (isVarArg)
   3013     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
   3014                          CCInfo.getNextStackOffset());
   3015 
   3016   return Chain;
   3017 }
   3018 
   3019 /// isFloatingPointZero - Return true if this is +0.0.
   3020 static bool isFloatingPointZero(SDValue Op) {
   3021   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
   3022     return CFP->getValueAPF().isPosZero();
   3023   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
   3024     // Maybe this has already been legalized into the constant pool?
   3025     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
   3026       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
   3027       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
   3028         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
   3029           return CFP->getValueAPF().isPosZero();
   3030     }
   3031   }
   3032   return false;
   3033 }
   3034 
   3035 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
   3036 /// the given operands.
   3037 SDValue
   3038 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   3039                              SDValue &ARMcc, SelectionDAG &DAG,
   3040                              SDLoc dl) const {
   3041   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
   3042     unsigned C = RHSC->getZExtValue();
   3043     if (!isLegalICmpImmediate(C)) {
   3044       // Constant does not fit, try adjusting it by one?
   3045       switch (CC) {
   3046       default: break;
   3047       case ISD::SETLT:
   3048       case ISD::SETGE:
   3049         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
   3050           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
   3051           RHS = DAG.getConstant(C-1, MVT::i32);
   3052         }
   3053         break;
   3054       case ISD::SETULT:
   3055       case ISD::SETUGE:
   3056         if (C != 0 && isLegalICmpImmediate(C-1)) {
   3057           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
   3058           RHS = DAG.getConstant(C-1, MVT::i32);
   3059         }
   3060         break;
   3061       case ISD::SETLE:
   3062       case ISD::SETGT:
   3063         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
   3064           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
   3065           RHS = DAG.getConstant(C+1, MVT::i32);
   3066         }
   3067         break;
   3068       case ISD::SETULE:
   3069       case ISD::SETUGT:
   3070         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
   3071           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
   3072           RHS = DAG.getConstant(C+1, MVT::i32);
   3073         }
   3074         break;
   3075       }
   3076     }
   3077   }
   3078 
   3079   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   3080   ARMISD::NodeType CompareType;
   3081   switch (CondCode) {
   3082   default:
   3083     CompareType = ARMISD::CMP;
   3084     break;
   3085   case ARMCC::EQ:
   3086   case ARMCC::NE:
   3087     // Uses only Z Flag
   3088     CompareType = ARMISD::CMPZ;
   3089     break;
   3090   }
   3091   ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3092   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
   3093 }
   3094 
   3095 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
   3096 SDValue
   3097 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   3098                              SDLoc dl) const {
   3099   SDValue Cmp;
   3100   if (!isFloatingPointZero(RHS))
   3101     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
   3102   else
   3103     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
   3104   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
   3105 }
   3106 
   3107 /// duplicateCmp - Glue values can have only one use, so this function
   3108 /// duplicates a comparison node.
   3109 SDValue
   3110 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
   3111   unsigned Opc = Cmp.getOpcode();
   3112   SDLoc DL(Cmp);
   3113   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
   3114     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   3115 
   3116   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
   3117   Cmp = Cmp.getOperand(0);
   3118   Opc = Cmp.getOpcode();
   3119   if (Opc == ARMISD::CMPFP)
   3120     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   3121   else {
   3122     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
   3123     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
   3124   }
   3125   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
   3126 }
   3127 
   3128 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   3129   SDValue Cond = Op.getOperand(0);
   3130   SDValue SelectTrue = Op.getOperand(1);
   3131   SDValue SelectFalse = Op.getOperand(2);
   3132   SDLoc dl(Op);
   3133 
   3134   // Convert:
   3135   //
   3136   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
   3137   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
   3138   //
   3139   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
   3140     const ConstantSDNode *CMOVTrue =
   3141       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
   3142     const ConstantSDNode *CMOVFalse =
   3143       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
   3144 
   3145     if (CMOVTrue && CMOVFalse) {
   3146       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
   3147       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
   3148 
   3149       SDValue True;
   3150       SDValue False;
   3151       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
   3152         True = SelectTrue;
   3153         False = SelectFalse;
   3154       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
   3155         True = SelectFalse;
   3156         False = SelectTrue;
   3157       }
   3158 
   3159       if (True.getNode() && False.getNode()) {
   3160         EVT VT = Op.getValueType();
   3161         SDValue ARMcc = Cond.getOperand(2);
   3162         SDValue CCR = Cond.getOperand(3);
   3163         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
   3164         assert(True.getValueType() == VT);
   3165         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
   3166       }
   3167     }
   3168   }
   3169 
   3170   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
   3171   // undefined bits before doing a full-word comparison with zero.
   3172   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
   3173                      DAG.getConstant(1, Cond.getValueType()));
   3174 
   3175   return DAG.getSelectCC(dl, Cond,
   3176                          DAG.getConstant(0, Cond.getValueType()),
   3177                          SelectTrue, SelectFalse, ISD::SETNE);
   3178 }
   3179 
   3180 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   3181   EVT VT = Op.getValueType();
   3182   SDValue LHS = Op.getOperand(0);
   3183   SDValue RHS = Op.getOperand(1);
   3184   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   3185   SDValue TrueVal = Op.getOperand(2);
   3186   SDValue FalseVal = Op.getOperand(3);
   3187   SDLoc dl(Op);
   3188 
   3189   if (LHS.getValueType() == MVT::i32) {
   3190     SDValue ARMcc;
   3191     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3192     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3193     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
   3194   }
   3195 
   3196   ARMCC::CondCodes CondCode, CondCode2;
   3197   FPCCToARMCC(CC, CondCode, CondCode2);
   3198 
   3199   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3200   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3201   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3202   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
   3203                                ARMcc, CCR, Cmp);
   3204   if (CondCode2 != ARMCC::AL) {
   3205     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
   3206     // FIXME: Needs another CMP because flag can have but one use.
   3207     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
   3208     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
   3209                          Result, TrueVal, ARMcc2, CCR, Cmp2);
   3210   }
   3211   return Result;
   3212 }
   3213 
   3214 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
   3215 /// to morph to an integer compare sequence.
   3216 static bool canChangeToInt(SDValue Op, bool &SeenZero,
   3217                            const ARMSubtarget *Subtarget) {
   3218   SDNode *N = Op.getNode();
   3219   if (!N->hasOneUse())
   3220     // Otherwise it requires moving the value from fp to integer registers.
   3221     return false;
   3222   if (!N->getNumValues())
   3223     return false;
   3224   EVT VT = Op.getValueType();
   3225   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
   3226     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
   3227     // vmrs are very slow, e.g. cortex-a8.
   3228     return false;
   3229 
   3230   if (isFloatingPointZero(Op)) {
   3231     SeenZero = true;
   3232     return true;
   3233   }
   3234   return ISD::isNormalLoad(N);
   3235 }
   3236 
   3237 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
   3238   if (isFloatingPointZero(Op))
   3239     return DAG.getConstant(0, MVT::i32);
   3240 
   3241   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
   3242     return DAG.getLoad(MVT::i32, SDLoc(Op),
   3243                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
   3244                        Ld->isVolatile(), Ld->isNonTemporal(),
   3245                        Ld->isInvariant(), Ld->getAlignment());
   3246 
   3247   llvm_unreachable("Unknown VFP cmp argument!");
   3248 }
   3249 
   3250 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
   3251                            SDValue &RetVal1, SDValue &RetVal2) {
   3252   if (isFloatingPointZero(Op)) {
   3253     RetVal1 = DAG.getConstant(0, MVT::i32);
   3254     RetVal2 = DAG.getConstant(0, MVT::i32);
   3255     return;
   3256   }
   3257 
   3258   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
   3259     SDValue Ptr = Ld->getBasePtr();
   3260     RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op),
   3261                           Ld->getChain(), Ptr,
   3262                           Ld->getPointerInfo(),
   3263                           Ld->isVolatile(), Ld->isNonTemporal(),
   3264                           Ld->isInvariant(), Ld->getAlignment());
   3265 
   3266     EVT PtrType = Ptr.getValueType();
   3267     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
   3268     SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op),
   3269                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
   3270     RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op),
   3271                           Ld->getChain(), NewPtr,
   3272                           Ld->getPointerInfo().getWithOffset(4),
   3273                           Ld->isVolatile(), Ld->isNonTemporal(),
   3274                           Ld->isInvariant(), NewAlign);
   3275     return;
   3276   }
   3277 
   3278   llvm_unreachable("Unknown VFP cmp argument!");
   3279 }
   3280 
   3281 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
   3282 /// f32 and even f64 comparisons to integer ones.
   3283 SDValue
   3284 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   3285   SDValue Chain = Op.getOperand(0);
   3286   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3287   SDValue LHS = Op.getOperand(2);
   3288   SDValue RHS = Op.getOperand(3);
   3289   SDValue Dest = Op.getOperand(4);
   3290   SDLoc dl(Op);
   3291 
   3292   bool LHSSeenZero = false;
   3293   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
   3294   bool RHSSeenZero = false;
   3295   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
   3296   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
   3297     // If unsafe fp math optimization is enabled and there are no other uses of
   3298     // the CMP operands, and the condition code is EQ or NE, we can optimize it
   3299     // to an integer comparison.
   3300     if (CC == ISD::SETOEQ)
   3301       CC = ISD::SETEQ;
   3302     else if (CC == ISD::SETUNE)
   3303       CC = ISD::SETNE;
   3304 
   3305     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
   3306     SDValue ARMcc;
   3307     if (LHS.getValueType() == MVT::f32) {
   3308       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3309                         bitcastf32Toi32(LHS, DAG), Mask);
   3310       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3311                         bitcastf32Toi32(RHS, DAG), Mask);
   3312       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3313       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3314       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3315                          Chain, Dest, ARMcc, CCR, Cmp);
   3316     }
   3317 
   3318     SDValue LHS1, LHS2;
   3319     SDValue RHS1, RHS2;
   3320     expandf64Toi32(LHS, DAG, LHS1, LHS2);
   3321     expandf64Toi32(RHS, DAG, RHS1, RHS2);
   3322     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
   3323     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
   3324     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   3325     ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3326     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3327     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
   3328     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
   3329   }
   3330 
   3331   return SDValue();
   3332 }
   3333 
   3334 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   3335   SDValue Chain = Op.getOperand(0);
   3336   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3337   SDValue LHS = Op.getOperand(2);
   3338   SDValue RHS = Op.getOperand(3);
   3339   SDValue Dest = Op.getOperand(4);
   3340   SDLoc dl(Op);
   3341 
   3342   if (LHS.getValueType() == MVT::i32) {
   3343     SDValue ARMcc;
   3344     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3345     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3346     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3347                        Chain, Dest, ARMcc, CCR, Cmp);
   3348   }
   3349 
   3350   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   3351 
   3352   if (getTargetMachine().Options.UnsafeFPMath &&
   3353       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
   3354        CC == ISD::SETNE || CC == ISD::SETUNE)) {
   3355     SDValue Result = OptimizeVFPBrcond(Op, DAG);
   3356     if (Result.getNode())
   3357       return Result;
   3358   }
   3359 
   3360   ARMCC::CondCodes CondCode, CondCode2;
   3361   FPCCToARMCC(CC, CondCode, CondCode2);
   3362 
   3363   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3364   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3365   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3366   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3367   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   3368   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3369   if (CondCode2 != ARMCC::AL) {
   3370     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
   3371     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
   3372     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3373   }
   3374   return Res;
   3375 }
   3376 
   3377 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   3378   SDValue Chain = Op.getOperand(0);
   3379   SDValue Table = Op.getOperand(1);
   3380   SDValue Index = Op.getOperand(2);
   3381   SDLoc dl(Op);
   3382 
   3383   EVT PTy = getPointerTy();
   3384   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   3385   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   3386   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   3387   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   3388   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
   3389   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
   3390   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
   3391   if (Subtarget->isThumb2()) {
   3392     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
   3393     // which does another jump to the destination. This also makes it easier
   3394     // to translate it to TBB / TBH later.
   3395     // FIXME: This might not work if the function is extremely large.
   3396     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
   3397                        Addr, Op.getOperand(2), JTI, UId);
   3398   }
   3399   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   3400     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
   3401                        MachinePointerInfo::getJumpTable(),
   3402                        false, false, false, 0);
   3403     Chain = Addr.getValue(1);
   3404     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
   3405     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3406   } else {
   3407     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
   3408                        MachinePointerInfo::getJumpTable(),
   3409                        false, false, false, 0);
   3410     Chain = Addr.getValue(1);
   3411     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3412   }
   3413 }
   3414 
   3415 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3416   EVT VT = Op.getValueType();
   3417   SDLoc dl(Op);
   3418 
   3419   if (Op.getValueType().getVectorElementType() == MVT::i32) {
   3420     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
   3421       return Op;
   3422     return DAG.UnrollVectorOp(Op.getNode());
   3423   }
   3424 
   3425   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
   3426          "Invalid type for custom lowering!");
   3427   if (VT != MVT::v4i16)
   3428     return DAG.UnrollVectorOp(Op.getNode());
   3429 
   3430   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
   3431   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
   3432 }
   3433 
   3434 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3435   EVT VT = Op.getValueType();
   3436   if (VT.isVector())
   3437     return LowerVectorFP_TO_INT(Op, DAG);
   3438 
   3439   SDLoc dl(Op);
   3440   unsigned Opc;
   3441 
   3442   switch (Op.getOpcode()) {
   3443   default: llvm_unreachable("Invalid opcode!");
   3444   case ISD::FP_TO_SINT:
   3445     Opc = ARMISD::FTOSI;
   3446     break;
   3447   case ISD::FP_TO_UINT:
   3448     Opc = ARMISD::FTOUI;
   3449     break;
   3450   }
   3451   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
   3452   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3453 }
   3454 
   3455 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3456   EVT VT = Op.getValueType();
   3457   SDLoc dl(Op);
   3458 
   3459   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
   3460     if (VT.getVectorElementType() == MVT::f32)
   3461       return Op;
   3462     return DAG.UnrollVectorOp(Op.getNode());
   3463   }
   3464 
   3465   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
   3466          "Invalid type for custom lowering!");
   3467   if (VT != MVT::v4f32)
   3468     return DAG.UnrollVectorOp(Op.getNode());
   3469 
   3470   unsigned CastOpc;
   3471   unsigned Opc;
   3472   switch (Op.getOpcode()) {
   3473   default: llvm_unreachable("Invalid opcode!");
   3474   case ISD::SINT_TO_FP:
   3475     CastOpc = ISD::SIGN_EXTEND;
   3476     Opc = ISD::SINT_TO_FP;
   3477     break;
   3478   case ISD::UINT_TO_FP:
   3479     CastOpc = ISD::ZERO_EXTEND;
   3480     Opc = ISD::UINT_TO_FP;
   3481     break;
   3482   }
   3483 
   3484   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
   3485   return DAG.getNode(Opc, dl, VT, Op);
   3486 }
   3487 
   3488 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3489   EVT VT = Op.getValueType();
   3490   if (VT.isVector())
   3491     return LowerVectorINT_TO_FP(Op, DAG);
   3492 
   3493   SDLoc dl(Op);
   3494   unsigned Opc;
   3495 
   3496   switch (Op.getOpcode()) {
   3497   default: llvm_unreachable("Invalid opcode!");
   3498   case ISD::SINT_TO_FP:
   3499     Opc = ARMISD::SITOF;
   3500     break;
   3501   case ISD::UINT_TO_FP:
   3502     Opc = ARMISD::UITOF;
   3503     break;
   3504   }
   3505 
   3506   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   3507   return DAG.getNode(Opc, dl, VT, Op);
   3508 }
   3509 
   3510 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   3511   // Implement fcopysign with a fabs and a conditional fneg.
   3512   SDValue Tmp0 = Op.getOperand(0);
   3513   SDValue Tmp1 = Op.getOperand(1);
   3514   SDLoc dl(Op);
   3515   EVT VT = Op.getValueType();
   3516   EVT SrcVT = Tmp1.getValueType();
   3517   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
   3518     Tmp0.getOpcode() == ARMISD::VMOVDRR;
   3519   bool UseNEON = !InGPR && Subtarget->hasNEON();
   3520 
   3521   if (UseNEON) {
   3522     // Use VBSL to copy the sign bit.
   3523     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
   3524     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
   3525                                DAG.getTargetConstant(EncodedVal, MVT::i32));
   3526     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
   3527     if (VT == MVT::f64)
   3528       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3529                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
   3530                          DAG.getConstant(32, MVT::i32));
   3531     else /*if (VT == MVT::f32)*/
   3532       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
   3533     if (SrcVT == MVT::f32) {
   3534       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
   3535       if (VT == MVT::f64)
   3536         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3537                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
   3538                            DAG.getConstant(32, MVT::i32));
   3539     } else if (VT == MVT::f32)
   3540       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
   3541                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
   3542                          DAG.getConstant(32, MVT::i32));
   3543     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
   3544     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
   3545 
   3546     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
   3547                                             MVT::i32);
   3548     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
   3549     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
   3550                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
   3551 
   3552     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
   3553                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
   3554                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
   3555     if (VT == MVT::f32) {
   3556       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
   3557       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
   3558                         DAG.getConstant(0, MVT::i32));
   3559     } else {
   3560       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
   3561     }
   3562 
   3563     return Res;
   3564   }
   3565 
   3566   // Bitcast operand 1 to i32.
   3567   if (SrcVT == MVT::f64)
   3568     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3569                        &Tmp1, 1).getValue(1);
   3570   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
   3571 
   3572   // Or in the signbit with integer operations.
   3573   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
   3574   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
   3575   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
   3576   if (VT == MVT::f32) {
   3577     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
   3578                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
   3579     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
   3580                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
   3581   }
   3582 
   3583   // f64: Or the high part with signbit and then combine two parts.
   3584   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3585                      &Tmp0, 1);
   3586   SDValue Lo = Tmp0.getValue(0);
   3587   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
   3588   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
   3589   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   3590 }
   3591 
   3592 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   3593   MachineFunction &MF = DAG.getMachineFunction();
   3594   MachineFrameInfo *MFI = MF.getFrameInfo();
   3595   MFI->setReturnAddressIsTaken(true);
   3596 
   3597   EVT VT = Op.getValueType();
   3598   SDLoc dl(Op);
   3599   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3600   if (Depth) {
   3601     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   3602     SDValue Offset = DAG.getConstant(4, MVT::i32);
   3603     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
   3604                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
   3605                        MachinePointerInfo(), false, false, false, 0);
   3606   }
   3607 
   3608   // Return LR, which contains the return address. Mark it an implicit live-in.
   3609   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
   3610   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   3611 }
   3612 
   3613 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   3614   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   3615   MFI->setFrameAddressIsTaken(true);
   3616 
   3617   EVT VT = Op.getValueType();
   3618   SDLoc dl(Op);  // FIXME probably not meaningful
   3619   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3620   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
   3621     ? ARM::R7 : ARM::R11;
   3622   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   3623   while (Depth--)
   3624     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
   3625                             MachinePointerInfo(),
   3626                             false, false, false, 0);
   3627   return FrameAddr;
   3628 }
   3629 
   3630 /// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec),
   3631 /// and size(DestVec) > 128-bits.
   3632 /// This is achieved by doing the one extension from the SrcVec, splitting the
   3633 /// result, extending these parts, and then concatenating these into the
   3634 /// destination.
   3635 static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) {
   3636   SDValue Op = N->getOperand(0);
   3637   EVT SrcVT = Op.getValueType();
   3638   EVT DestVT = N->getValueType(0);
   3639 
   3640   assert(DestVT.getSizeInBits() > 128 &&
   3641          "Custom sext/zext expansion needs >128-bit vector.");
   3642   // If this is a normal length extension, use the default expansion.
   3643   if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() &&
   3644       SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits())
   3645     return SDValue();
   3646 
   3647   SDLoc dl(N);
   3648   unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
   3649   unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits();
   3650   unsigned NumElts = SrcVT.getVectorNumElements();
   3651   LLVMContext &Ctx = *DAG.getContext();
   3652   SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi;
   3653 
   3654   EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
   3655                                NumElts);
   3656   EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2),
   3657                                  NumElts/2);
   3658   EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize),
   3659                                NumElts/2);
   3660 
   3661   Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op);
   3662   SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
   3663                         DAG.getIntPtrConstant(0));
   3664   SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid,
   3665                         DAG.getIntPtrConstant(NumElts/2));
   3666   ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo);
   3667   ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi);
   3668   return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi);
   3669 }
   3670 
   3671 /// ExpandBITCAST - If the target supports VFP, this function is called to
   3672 /// expand a bit convert where either the source or destination type is i64 to
   3673 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
   3674 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
   3675 /// vectors), since the legalizer won't know what to do with that.
   3676 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   3677   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   3678   SDLoc dl(N);
   3679   SDValue Op = N->getOperand(0);
   3680 
   3681   // This function is only supposed to be called for i64 types, either as the
   3682   // source or destination of the bit convert.
   3683   EVT SrcVT = Op.getValueType();
   3684   EVT DstVT = N->getValueType(0);
   3685   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
   3686          "ExpandBITCAST called for non-i64 type");
   3687 
   3688   // Turn i64->f64 into VMOVDRR.
   3689   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
   3690     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3691                              DAG.getConstant(0, MVT::i32));
   3692     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3693                              DAG.getConstant(1, MVT::i32));
   3694     return DAG.getNode(ISD::BITCAST, dl, DstVT,
   3695                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   3696   }
   3697 
   3698   // Turn f64->i64 into VMOVRRD.
   3699   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
   3700     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
   3701                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
   3702     // Merge the pieces into a single i64 value.
   3703     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
   3704   }
   3705 
   3706   return SDValue();
   3707 }
   3708 
   3709 /// getZeroVector - Returns a vector of specified type with all zero elements.
   3710 /// Zero vectors are used to represent vector negation and in those cases
   3711 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
   3712 /// not support i64 elements, so sometimes the zero vectors will need to be
   3713 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
   3714 /// zero vector.
   3715 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) {
   3716   assert(VT.isVector() && "Expected a vector type");
   3717   // The canonical modified immediate encoding of a zero vector is....0!
   3718   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
   3719   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   3720   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
   3721   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   3722 }
   3723 
   3724 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
   3725 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3726 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   3727                                                 SelectionDAG &DAG) const {
   3728   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3729   EVT VT = Op.getValueType();
   3730   unsigned VTBits = VT.getSizeInBits();
   3731   SDLoc dl(Op);
   3732   SDValue ShOpLo = Op.getOperand(0);
   3733   SDValue ShOpHi = Op.getOperand(1);
   3734   SDValue ShAmt  = Op.getOperand(2);
   3735   SDValue ARMcc;
   3736   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
   3737 
   3738   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
   3739 
   3740   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3741                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3742   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   3743   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3744                                    DAG.getConstant(VTBits, MVT::i32));
   3745   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   3746   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3747   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   3748 
   3749   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3750   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3751                           ARMcc, DAG, dl);
   3752   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   3753   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
   3754                            CCR, Cmp);
   3755 
   3756   SDValue Ops[2] = { Lo, Hi };
   3757   return DAG.getMergeValues(Ops, 2, dl);
   3758 }
   3759 
   3760 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
   3761 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3762 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   3763                                                SelectionDAG &DAG) const {
   3764   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3765   EVT VT = Op.getValueType();
   3766   unsigned VTBits = VT.getSizeInBits();
   3767   SDLoc dl(Op);
   3768   SDValue ShOpLo = Op.getOperand(0);
   3769   SDValue ShOpHi = Op.getOperand(1);
   3770   SDValue ShAmt  = Op.getOperand(2);
   3771   SDValue ARMcc;
   3772 
   3773   assert(Op.getOpcode() == ISD::SHL_PARTS);
   3774   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3775                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3776   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   3777   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3778                                    DAG.getConstant(VTBits, MVT::i32));
   3779   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   3780   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   3781 
   3782   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3783   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3784   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3785                           ARMcc, DAG, dl);
   3786   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   3787   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
   3788                            CCR, Cmp);
   3789 
   3790   SDValue Ops[2] = { Lo, Hi };
   3791   return DAG.getMergeValues(Ops, 2, dl);
   3792 }
   3793 
   3794 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   3795                                             SelectionDAG &DAG) const {
   3796   // The rounding mode is in bits 23:22 of the FPSCR.
   3797   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
   3798   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
   3799   // so that the shift + and get folded into a bitfield extract.
   3800   SDLoc dl(Op);
   3801   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
   3802                               DAG.getConstant(Intrinsic::arm_get_fpscr,
   3803                                               MVT::i32));
   3804   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
   3805                                   DAG.getConstant(1U << 22, MVT::i32));
   3806   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
   3807                               DAG.getConstant(22, MVT::i32));
   3808   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
   3809                      DAG.getConstant(3, MVT::i32));
   3810 }
   3811 
   3812 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   3813                          const ARMSubtarget *ST) {
   3814   EVT VT = N->getValueType(0);
   3815   SDLoc dl(N);
   3816 
   3817   if (!ST->hasV6T2Ops())
   3818     return SDValue();
   3819 
   3820   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
   3821   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
   3822 }
   3823 
   3824 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
   3825 /// for each 16-bit element from operand, repeated.  The basic idea is to
   3826 /// leverage vcnt to get the 8-bit counts, gather and add the results.
   3827 ///
   3828 /// Trace for v4i16:
   3829 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
   3830 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
   3831 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
   3832 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
   3833 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
   3834 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
   3835 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
   3836 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
   3837 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
   3838   EVT VT = N->getValueType(0);
   3839   SDLoc DL(N);
   3840 
   3841   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
   3842   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
   3843   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
   3844   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
   3845   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
   3846   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
   3847 }
   3848 
   3849 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
   3850 /// bit-count for each 16-bit element from the operand.  We need slightly
   3851 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
   3852 /// 64/128-bit registers.
   3853 ///
   3854 /// Trace for v4i16:
   3855 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
   3856 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
   3857 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
   3858 /// v4i16:Extracted = [k0    k1    k2    k3    ]
   3859 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
   3860   EVT VT = N->getValueType(0);
   3861   SDLoc DL(N);
   3862 
   3863   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
   3864   if (VT.is64BitVector()) {
   3865     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
   3866     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
   3867                        DAG.getIntPtrConstant(0));
   3868   } else {
   3869     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
   3870                                     BitCounts, DAG.getIntPtrConstant(0));
   3871     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
   3872   }
   3873 }
   3874 
   3875 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
   3876 /// bit-count for each 32-bit element from the operand.  The idea here is
   3877 /// to split the vector into 16-bit elements, leverage the 16-bit count
   3878 /// routine, and then combine the results.
   3879 ///
   3880 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
   3881 /// input    = [v0    v1    ] (vi: 32-bit elements)
   3882 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
   3883 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
   3884 /// vrev: N0 = [k1 k0 k3 k2 ]
   3885 ///            [k0 k1 k2 k3 ]
   3886 ///       N1 =+[k1 k0 k3 k2 ]
   3887 ///            [k0 k2 k1 k3 ]
   3888 ///       N2 =+[k1 k3 k0 k2 ]
   3889 ///            [k0    k2    k1    k3    ]
   3890 /// Extended =+[k1    k3    k0    k2    ]
   3891 ///            [k0    k2    ]
   3892 /// Extracted=+[k1    k3    ]
   3893 ///
   3894 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
   3895   EVT VT = N->getValueType(0);
   3896   SDLoc DL(N);
   3897 
   3898   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
   3899 
   3900   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
   3901   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
   3902   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
   3903   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
   3904   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
   3905 
   3906   if (VT.is64BitVector()) {
   3907     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
   3908     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
   3909                        DAG.getIntPtrConstant(0));
   3910   } else {
   3911     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
   3912                                     DAG.getIntPtrConstant(0));
   3913     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
   3914   }
   3915 }
   3916 
   3917 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
   3918                           const ARMSubtarget *ST) {
   3919   EVT VT = N->getValueType(0);
   3920 
   3921   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
   3922   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
   3923           VT == MVT::v4i16 || VT == MVT::v8i16) &&
   3924          "Unexpected type for custom ctpop lowering");
   3925 
   3926   if (VT.getVectorElementType() == MVT::i32)
   3927     return lowerCTPOP32BitElements(N, DAG);
   3928   else
   3929     return lowerCTPOP16BitElements(N, DAG);
   3930 }
   3931 
   3932 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   3933                           const ARMSubtarget *ST) {
   3934   EVT VT = N->getValueType(0);
   3935   SDLoc dl(N);
   3936 
   3937   if (!VT.isVector())
   3938     return SDValue();
   3939 
   3940   // Lower vector shifts on NEON to use VSHL.
   3941   assert(ST->hasNEON() && "unexpected vector shift");
   3942 
   3943   // Left shifts translate directly to the vshiftu intrinsic.
   3944   if (N->getOpcode() == ISD::SHL)
   3945     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3946                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
   3947                        N->getOperand(0), N->getOperand(1));
   3948 
   3949   assert((N->getOpcode() == ISD::SRA ||
   3950           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
   3951 
   3952   // NEON uses the same intrinsics for both left and right shifts.  For
   3953   // right shifts, the shift amounts are negative, so negate the vector of
   3954   // shift amounts.
   3955   EVT ShiftVT = N->getOperand(1).getValueType();
   3956   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
   3957                                      getZeroVector(ShiftVT, DAG, dl),
   3958                                      N->getOperand(1));
   3959   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
   3960                              Intrinsic::arm_neon_vshifts :
   3961                              Intrinsic::arm_neon_vshiftu);
   3962   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3963                      DAG.getConstant(vshiftInt, MVT::i32),
   3964                      N->getOperand(0), NegatedCount);
   3965 }
   3966 
   3967 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
   3968                                 const ARMSubtarget *ST) {
   3969   EVT VT = N->getValueType(0);
   3970   SDLoc dl(N);
   3971 
   3972   // We can get here for a node like i32 = ISD::SHL i32, i64
   3973   if (VT != MVT::i64)
   3974     return SDValue();
   3975 
   3976   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
   3977          "Unknown shift to lower!");
   3978 
   3979   // We only lower SRA, SRL of 1 here, all others use generic lowering.
   3980   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
   3981       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
   3982     return SDValue();
   3983 
   3984   // If we are in thumb mode, we don't have RRX.
   3985   if (ST->isThumb1Only()) return SDValue();
   3986 
   3987   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   3988   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3989                            DAG.getConstant(0, MVT::i32));
   3990   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3991                            DAG.getConstant(1, MVT::i32));
   3992 
   3993   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   3994   // captures the result into a carry flag.
   3995   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
   3996   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
   3997 
   3998   // The low part is an ARMISD::RRX operand, which shifts the carry in.
   3999   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
   4000 
   4001   // Merge the pieces into a single i64 value.
   4002  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
   4003 }
   4004 
   4005 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   4006   SDValue TmpOp0, TmpOp1;
   4007   bool Invert = false;
   4008   bool Swap = false;
   4009   unsigned Opc = 0;
   4010 
   4011   SDValue Op0 = Op.getOperand(0);
   4012   SDValue Op1 = Op.getOperand(1);
   4013   SDValue CC = Op.getOperand(2);
   4014   EVT VT = Op.getValueType();
   4015   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   4016   SDLoc dl(Op);
   4017 
   4018   if (Op.getOperand(1).