Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that ARM uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-isel"
     16 #include "ARMISelLowering.h"
     17 #include "ARM.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMMachineFunctionInfo.h"
     21 #include "ARMPerfectShuffle.h"
     22 #include "ARMSubtarget.h"
     23 #include "ARMTargetMachine.h"
     24 #include "ARMTargetObjectFile.h"
     25 #include "MCTargetDesc/ARMAddressingModes.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/ADT/StringExtras.h"
     28 #include "llvm/CodeGen/CallingConvLower.h"
     29 #include "llvm/CodeGen/IntrinsicLowering.h"
     30 #include "llvm/CodeGen/MachineBasicBlock.h"
     31 #include "llvm/CodeGen/MachineFrameInfo.h"
     32 #include "llvm/CodeGen/MachineFunction.h"
     33 #include "llvm/CodeGen/MachineInstrBuilder.h"
     34 #include "llvm/CodeGen/MachineModuleInfo.h"
     35 #include "llvm/CodeGen/MachineRegisterInfo.h"
     36 #include "llvm/CodeGen/SelectionDAG.h"
     37 #include "llvm/IR/CallingConv.h"
     38 #include "llvm/IR/Constants.h"
     39 #include "llvm/IR/Function.h"
     40 #include "llvm/IR/GlobalValue.h"
     41 #include "llvm/IR/Instruction.h"
     42 #include "llvm/IR/Instructions.h"
     43 #include "llvm/IR/Intrinsics.h"
     44 #include "llvm/IR/Type.h"
     45 #include "llvm/MC/MCSectionMachO.h"
     46 #include "llvm/Support/CommandLine.h"
     47 #include "llvm/Support/ErrorHandling.h"
     48 #include "llvm/Support/MathExtras.h"
     49 #include "llvm/Support/raw_ostream.h"
     50 #include "llvm/Target/TargetOptions.h"
     51 using namespace llvm;
     52 
     53 STATISTIC(NumTailCalls, "Number of tail calls");
     54 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
     55 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
     56 
     57 // This option should go away when tail calls fully work.
     58 static cl::opt<bool>
     59 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
     60   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
     61   cl::init(false));
     62 
     63 cl::opt<bool>
     64 EnableARMLongCalls("arm-long-calls", cl::Hidden,
     65   cl::desc("Generate calls via indirect call instructions"),
     66   cl::init(false));
     67 
     68 static cl::opt<bool>
     69 ARMInterworking("arm-interworking", cl::Hidden,
     70   cl::desc("Enable / disable ARM interworking (for debugging only)"),
     71   cl::init(true));
     72 
     73 namespace {
     74   class ARMCCState : public CCState {
     75   public:
     76     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
     77                const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
     78                LLVMContext &C, ParmContext PC)
     79         : CCState(CC, isVarArg, MF, TM, locs, C) {
     80       assert(((PC == Call) || (PC == Prologue)) &&
     81              "ARMCCState users must specify whether their context is call"
     82              "or prologue generation.");
     83       CallOrPrologue = PC;
     84     }
     85   };
     86 }
     87 
     88 // The APCS parameter registers.
     89 static const uint16_t GPRArgRegs[] = {
     90   ARM::R0, ARM::R1, ARM::R2, ARM::R3
     91 };
     92 
     93 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
     94                                        MVT PromotedBitwiseVT) {
     95   if (VT != PromotedLdStVT) {
     96     setOperationAction(ISD::LOAD, VT, Promote);
     97     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
     98 
     99     setOperationAction(ISD::STORE, VT, Promote);
    100     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
    101   }
    102 
    103   MVT ElemTy = VT.getVectorElementType();
    104   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
    105     setOperationAction(ISD::SETCC, VT, Custom);
    106   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    107   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    108   if (ElemTy == MVT::i32) {
    109     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
    110     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
    111     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
    112     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
    113   } else {
    114     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
    115     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
    116     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
    117     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
    118   }
    119   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
    120   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
    121   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
    122   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
    123   setOperationAction(ISD::SELECT,            VT, Expand);
    124   setOperationAction(ISD::SELECT_CC,         VT, Expand);
    125   setOperationAction(ISD::VSELECT,           VT, Expand);
    126   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
    127   if (VT.isInteger()) {
    128     setOperationAction(ISD::SHL, VT, Custom);
    129     setOperationAction(ISD::SRA, VT, Custom);
    130     setOperationAction(ISD::SRL, VT, Custom);
    131   }
    132 
    133   // Promote all bit-wise operations.
    134   if (VT.isInteger() && VT != PromotedBitwiseVT) {
    135     setOperationAction(ISD::AND, VT, Promote);
    136     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
    137     setOperationAction(ISD::OR,  VT, Promote);
    138     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
    139     setOperationAction(ISD::XOR, VT, Promote);
    140     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
    141   }
    142 
    143   // Neon does not support vector divide/remainder operations.
    144   setOperationAction(ISD::SDIV, VT, Expand);
    145   setOperationAction(ISD::UDIV, VT, Expand);
    146   setOperationAction(ISD::FDIV, VT, Expand);
    147   setOperationAction(ISD::SREM, VT, Expand);
    148   setOperationAction(ISD::UREM, VT, Expand);
    149   setOperationAction(ISD::FREM, VT, Expand);
    150 }
    151 
    152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
    153   addRegisterClass(VT, &ARM::DPRRegClass);
    154   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
    155 }
    156 
    157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
    158   addRegisterClass(VT, &ARM::QPRRegClass);
    159   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
    160 }
    161 
    162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
    163   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
    164     return new TargetLoweringObjectFileMachO();
    165 
    166   return new ARMElfTargetObjectFile();
    167 }
    168 
    169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    170     : TargetLowering(TM, createTLOF(TM)) {
    171   Subtarget = &TM.getSubtarget<ARMSubtarget>();
    172   RegInfo = TM.getRegisterInfo();
    173   Itins = TM.getInstrItineraryData();
    174 
    175   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
    176 
    177   if (Subtarget->isTargetDarwin()) {
    178     // Uses VFP for Thumb libfuncs if available.
    179     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
    180       // Single-precision floating-point arithmetic.
    181       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
    182       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
    183       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
    184       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
    185 
    186       // Double-precision floating-point arithmetic.
    187       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
    188       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
    189       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
    190       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
    191 
    192       // Single-precision comparisons.
    193       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
    194       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
    195       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
    196       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
    197       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
    198       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
    199       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
    200       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
    201 
    202       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    203       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
    204       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    205       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    206       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    207       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    208       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    209       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    210 
    211       // Double-precision comparisons.
    212       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
    213       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
    214       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
    215       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
    216       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
    217       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
    218       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
    219       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
    220 
    221       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    222       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
    223       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    224       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    225       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    226       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    227       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    228       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    229 
    230       // Floating-point to integer conversions.
    231       // i64 conversions are done via library routines even when generating VFP
    232       // instructions, so use the same ones.
    233       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
    234       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
    235       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
    236       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
    237 
    238       // Conversions between floating types.
    239       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
    240       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
    241 
    242       // Integer to floating-point conversions.
    243       // i64 conversions are done via library routines even when generating VFP
    244       // instructions, so use the same ones.
    245       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
    246       // e.g., __floatunsidf vs. __floatunssidfvfp.
    247       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
    248       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
    249       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
    250       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
    251     }
    252   }
    253 
    254   // These libcalls are not available in 32-bit.
    255   setLibcallName(RTLIB::SHL_I128, 0);
    256   setLibcallName(RTLIB::SRL_I128, 0);
    257   setLibcallName(RTLIB::SRA_I128, 0);
    258 
    259   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
    260     // Double-precision floating-point arithmetic helper functions
    261     // RTABI chapter 4.1.2, Table 2
    262     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
    263     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
    264     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
    265     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
    266     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
    267     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
    268     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
    269     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
    270 
    271     // Double-precision floating-point comparison helper functions
    272     // RTABI chapter 4.1.2, Table 3
    273     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
    274     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    275     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
    276     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
    277     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
    278     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    279     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
    280     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    281     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
    282     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    283     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
    284     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    285     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
    286     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    287     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
    288     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    289     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
    290     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
    291     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
    292     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
    293     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
    294     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
    295     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
    296     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
    297 
    298     // Single-precision floating-point arithmetic helper functions
    299     // RTABI chapter 4.1.2, Table 4
    300     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
    301     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
    302     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
    303     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
    304     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
    305     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
    306     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
    307     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
    308 
    309     // Single-precision floating-point comparison helper functions
    310     // RTABI chapter 4.1.2, Table 5
    311     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
    312     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    313     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
    314     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
    315     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
    316     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    317     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
    318     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    319     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
    320     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    321     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
    322     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    323     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
    324     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    325     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
    326     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    327     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
    328     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
    329     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
    330     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
    331     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
    332     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
    333     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
    334     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
    335 
    336     // Floating-point to integer conversions.
    337     // RTABI chapter 4.1.2, Table 6
    338     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
    339     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
    340     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
    341     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
    342     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
    343     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
    344     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
    345     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
    346     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
    347     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
    348     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
    349     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
    350     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
    351     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
    352     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
    353     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
    354 
    355     // Conversions between floating types.
    356     // RTABI chapter 4.1.2, Table 7
    357     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
    358     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
    359     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
    360     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
    361 
    362     // Integer to floating-point conversions.
    363     // RTABI chapter 4.1.2, Table 8
    364     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
    365     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
    366     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
    367     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
    368     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
    369     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
    370     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
    371     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
    372     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    373     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    374     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    375     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    376     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    377     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    378     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    379     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    380 
    381     // Long long helper functions
    382     // RTABI chapter 4.2, Table 9
    383     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
    384     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
    385     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
    386     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
    387     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
    388     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    389     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    390     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
    391     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
    392     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
    393 
    394     // Integer division functions
    395     // RTABI chapter 4.3.1
    396     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
    397     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
    398     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
    399     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
    400     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
    401     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
    402     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
    403     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
    404     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
    405     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
    406     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
    407     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    408     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
    409     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
    410     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
    411     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    412 
    413     // Memory operations
    414     // RTABI chapter 4.3.4
    415     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
    416     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
    417     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
    418     setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
    419     setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
    420     setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
    421   }
    422 
    423   // Use divmod compiler-rt calls for iOS 5.0 and later.
    424   if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
    425       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
    426     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
    427     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
    428   }
    429 
    430   if (Subtarget->isThumb1Only())
    431     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
    432   else
    433     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
    434   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    435       !Subtarget->isThumb1Only()) {
    436     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
    437     if (!Subtarget->isFPOnlySP())
    438       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
    439 
    440     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    441   }
    442 
    443   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    444        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
    445     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    446          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
    447       setTruncStoreAction((MVT::SimpleValueType)VT,
    448                           (MVT::SimpleValueType)InnerVT, Expand);
    449     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    450     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    451     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
    452   }
    453 
    454   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
    455 
    456   if (Subtarget->hasNEON()) {
    457     addDRTypeForNEON(MVT::v2f32);
    458     addDRTypeForNEON(MVT::v8i8);
    459     addDRTypeForNEON(MVT::v4i16);
    460     addDRTypeForNEON(MVT::v2i32);
    461     addDRTypeForNEON(MVT::v1i64);
    462 
    463     addQRTypeForNEON(MVT::v4f32);
    464     addQRTypeForNEON(MVT::v2f64);
    465     addQRTypeForNEON(MVT::v16i8);
    466     addQRTypeForNEON(MVT::v8i16);
    467     addQRTypeForNEON(MVT::v4i32);
    468     addQRTypeForNEON(MVT::v2i64);
    469 
    470     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
    471     // neither Neon nor VFP support any arithmetic operations on it.
    472     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
    473     // supported for v4f32.
    474     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    475     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    476     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    477     // FIXME: Code duplication: FDIV and FREM are expanded always, see
    478     // ARMTargetLowering::addTypeForNEON method for details.
    479     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
    480     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
    481     // FIXME: Create unittest.
    482     // In another words, find a way when "copysign" appears in DAG with vector
    483     // operands.
    484     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
    485     // FIXME: Code duplication: SETCC has custom operation action, see
    486     // ARMTargetLowering::addTypeForNEON method for details.
    487     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
    488     // FIXME: Create unittest for FNEG and for FABS.
    489     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
    490     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    491     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
    492     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
    493     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
    494     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
    495     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
    496     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
    497     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
    498     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
    499     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
    500     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
    501     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
    502     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
    503     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
    504     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
    505     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
    506     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
    507     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
    508 
    509     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
    510     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
    511     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
    512     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
    513     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
    514     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
    515     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
    516     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
    517     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
    518     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
    519     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
    520     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
    521     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
    522     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
    523     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
    524 
    525     // Mark v2f32 intrinsics.
    526     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
    527     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
    528     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
    529     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
    530     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
    531     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
    532     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
    533     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
    534     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
    535     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
    536     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
    537     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
    538     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
    539     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
    540     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
    541 
    542     // Neon does not support some operations on v1i64 and v2i64 types.
    543     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
    544     // Custom handling for some quad-vector types to detect VMULL.
    545     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
    546     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
    547     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
    548     // Custom handling for some vector types to avoid expensive expansions
    549     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
    550     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
    551     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
    552     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
    553     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
    554     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
    555     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
    556     // a destination type that is wider than the source, and nor does
    557     // it have a FP_TO_[SU]INT instruction with a narrower destination than
    558     // source.
    559     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
    560     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
    561     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
    562     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
    563 
    564     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
    565     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
    566 
    567     // NEON does not have single instruction CTPOP for vectors with element
    568     // types wider than 8-bits.  However, custom lowering can leverage the
    569     // v8i8/v16i8 vcnt instruction.
    570     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
    571     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
    572     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
    573     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
    574 
    575     // NEON only has FMA instructions as of VFP4.
    576     if (!Subtarget->hasVFP4()) {
    577       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
    578       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
    579     }
    580 
    581     setTargetDAGCombine(ISD::INTRINSIC_VOID);
    582     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
    583     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
    584     setTargetDAGCombine(ISD::SHL);
    585     setTargetDAGCombine(ISD::SRL);
    586     setTargetDAGCombine(ISD::SRA);
    587     setTargetDAGCombine(ISD::SIGN_EXTEND);
    588     setTargetDAGCombine(ISD::ZERO_EXTEND);
    589     setTargetDAGCombine(ISD::ANY_EXTEND);
    590     setTargetDAGCombine(ISD::SELECT_CC);
    591     setTargetDAGCombine(ISD::BUILD_VECTOR);
    592     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
    593     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
    594     setTargetDAGCombine(ISD::STORE);
    595     setTargetDAGCombine(ISD::FP_TO_SINT);
    596     setTargetDAGCombine(ISD::FP_TO_UINT);
    597     setTargetDAGCombine(ISD::FDIV);
    598 
    599     // It is legal to extload from v4i8 to v4i16 or v4i32.
    600     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
    601                   MVT::v4i16, MVT::v2i16,
    602                   MVT::v2i32};
    603     for (unsigned i = 0; i < 6; ++i) {
    604       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
    605       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
    606       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
    607     }
    608   }
    609 
    610   // ARM and Thumb2 support UMLAL/SMLAL.
    611   if (!Subtarget->isThumb1Only())
    612     setTargetDAGCombine(ISD::ADDC);
    613 
    614 
    615   computeRegisterProperties();
    616 
    617   // ARM does not have f32 extending load.
    618   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    619 
    620   // ARM does not have i1 sign extending load.
    621   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
    622 
    623   // ARM supports all 4 flavors of integer indexed load / store.
    624   if (!Subtarget->isThumb1Only()) {
    625     for (unsigned im = (unsigned)ISD::PRE_INC;
    626          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
    627       setIndexedLoadAction(im,  MVT::i1,  Legal);
    628       setIndexedLoadAction(im,  MVT::i8,  Legal);
    629       setIndexedLoadAction(im,  MVT::i16, Legal);
    630       setIndexedLoadAction(im,  MVT::i32, Legal);
    631       setIndexedStoreAction(im, MVT::i1,  Legal);
    632       setIndexedStoreAction(im, MVT::i8,  Legal);
    633       setIndexedStoreAction(im, MVT::i16, Legal);
    634       setIndexedStoreAction(im, MVT::i32, Legal);
    635     }
    636   }
    637 
    638   // i64 operation support.
    639   setOperationAction(ISD::MUL,     MVT::i64, Expand);
    640   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
    641   if (Subtarget->isThumb1Only()) {
    642     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
    643     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
    644   }
    645   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
    646       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
    647     setOperationAction(ISD::MULHS, MVT::i32, Expand);
    648 
    649   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
    650   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
    651   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
    652   setOperationAction(ISD::SRL,       MVT::i64, Custom);
    653   setOperationAction(ISD::SRA,       MVT::i64, Custom);
    654 
    655   if (!Subtarget->isThumb1Only()) {
    656     // FIXME: We should do this for Thumb1 as well.
    657     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
    658     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
    659     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
    660     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
    661   }
    662 
    663   // ARM does not have ROTL.
    664   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
    665   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
    666   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
    667   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
    668     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
    669 
    670   // These just redirect to CTTZ and CTLZ on ARM.
    671   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
    672   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
    673 
    674   // Only ARMv6 has BSWAP.
    675   if (!Subtarget->hasV6Ops())
    676     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
    677 
    678   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
    679       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
    680     // These are expanded into libcalls if the cpu doesn't have HW divider.
    681     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
    682     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
    683   }
    684   setOperationAction(ISD::SREM,  MVT::i32, Expand);
    685   setOperationAction(ISD::UREM,  MVT::i32, Expand);
    686   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    687   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    688 
    689   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
    690   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
    691   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
    692   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
    693   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
    694 
    695   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    696 
    697   // Use the default implementation.
    698   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
    699   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
    700   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
    701   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
    702   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
    703   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
    704 
    705   if (!Subtarget->isTargetDarwin()) {
    706     // Non-Darwin platforms may return values in these registers via the
    707     // personality function.
    708     setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
    709     setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
    710     setExceptionPointerRegister(ARM::R0);
    711     setExceptionSelectorRegister(ARM::R1);
    712   }
    713 
    714   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
    715   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
    716   // the default expansion.
    717   // FIXME: This should be checking for v6k, not just v6.
    718   if (Subtarget->hasDataBarrier() ||
    719       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
    720     // membarrier needs custom lowering; the rest are legal and handled
    721     // normally.
    722     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
    723     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
    724     // Custom lowering for 64-bit ops
    725     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
    726     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
    727     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
    728     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
    729     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
    730     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Custom);
    731     setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i64, Custom);
    732     setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i64, Custom);
    733     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
    734     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
    735     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
    736     // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
    737     setInsertFencesForAtomic(true);
    738   } else {
    739     // Set them all for expansion, which will force libcalls.
    740     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
    741     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
    742     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
    743     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
    744     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
    745     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
    746     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
    747     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
    748     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
    749     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
    750     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
    751     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
    752     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
    753     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
    754     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
    755     // Unordered/Monotonic case.
    756     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
    757     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
    758     // Since the libcalls include locking, fold in the fences
    759     setShouldFoldAtomicFences(true);
    760   }
    761 
    762   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
    763 
    764   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
    765   if (!Subtarget->hasV6Ops()) {
    766     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    767     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
    768   }
    769   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    770 
    771   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    772       !Subtarget->isThumb1Only()) {
    773     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
    774     // iff target supports vfp2.
    775     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
    776     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
    777   }
    778 
    779   // We want to custom lower some of our intrinsics.
    780   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    781   if (Subtarget->isTargetDarwin()) {
    782     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
    783     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
    784     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
    785   }
    786 
    787   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    788   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
    789   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
    790   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
    791   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
    792   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
    793   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
    794   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    795   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    796 
    797   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
    798   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
    799   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
    800   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
    801   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
    802 
    803   // We don't support sin/cos/fmod/copysign/pow
    804   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
    805   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
    806   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
    807   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
    808   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
    809   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
    810   setOperationAction(ISD::FREM,      MVT::f64, Expand);
    811   setOperationAction(ISD::FREM,      MVT::f32, Expand);
    812   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    813       !Subtarget->isThumb1Only()) {
    814     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    815     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
    816   }
    817   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
    818   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
    819 
    820   if (!Subtarget->hasVFP4()) {
    821     setOperationAction(ISD::FMA, MVT::f64, Expand);
    822     setOperationAction(ISD::FMA, MVT::f32, Expand);
    823   }
    824 
    825   // Various VFP goodness
    826   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
    827     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
    828     if (Subtarget->hasVFP2()) {
    829       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    830       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
    831       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    832       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    833     }
    834     // Special handling for half-precision FP.
    835     if (!Subtarget->hasFP16()) {
    836       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
    837       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
    838     }
    839   }
    840 
    841   // We have target-specific dag combine patterns for the following nodes:
    842   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
    843   setTargetDAGCombine(ISD::ADD);
    844   setTargetDAGCombine(ISD::SUB);
    845   setTargetDAGCombine(ISD::MUL);
    846   setTargetDAGCombine(ISD::AND);
    847   setTargetDAGCombine(ISD::OR);
    848   setTargetDAGCombine(ISD::XOR);
    849 
    850   if (Subtarget->hasV6Ops())
    851     setTargetDAGCombine(ISD::SRL);
    852 
    853   setStackPointerRegisterToSaveRestore(ARM::SP);
    854 
    855   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
    856       !Subtarget->hasVFP2())
    857     setSchedulingPreference(Sched::RegPressure);
    858   else
    859     setSchedulingPreference(Sched::Hybrid);
    860 
    861   //// temporary - rewrite interface to use type
    862   MaxStoresPerMemset = 8;
    863   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
    864   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
    865   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    866   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
    867   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    868 
    869   // On ARM arguments smaller than 4 bytes are extended, so all arguments
    870   // are at least 4 bytes aligned.
    871   setMinStackArgumentAlignment(4);
    872 
    873   BenefitFromCodePlacementOpt = true;
    874 
    875   // Prefer likely predicted branches to selects on out-of-order cores.
    876   PredictableSelectIsExpensive = Subtarget->isLikeA9();
    877 
    878   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
    879 }
    880 
    881 // FIXME: It might make sense to define the representative register class as the
    882 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
    883 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
    884 // SPR's representative would be DPR_VFP2. This should work well if register
    885 // pressure tracking were modified such that a register use would increment the
    886 // pressure of the register class's representative and all of it's super
    887 // classes' representatives transitively. We have not implemented this because
    888 // of the difficulty prior to coalescing of modeling operand register classes
    889 // due to the common occurrence of cross class copies and subregister insertions
    890 // and extractions.
    891 std::pair<const TargetRegisterClass*, uint8_t>
    892 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
    893   const TargetRegisterClass *RRC = 0;
    894   uint8_t Cost = 1;
    895   switch (VT.SimpleTy) {
    896   default:
    897     return TargetLowering::findRepresentativeClass(VT);
    898   // Use DPR as representative register class for all floating point
    899   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
    900   // the cost is 1 for both f32 and f64.
    901   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
    902   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
    903     RRC = &ARM::DPRRegClass;
    904     // When NEON is used for SP, only half of the register file is available
    905     // because operations that define both SP and DP results will be constrained
    906     // to the VFP2 class (D0-D15). We currently model this constraint prior to
    907     // coalescing by double-counting the SP regs. See the FIXME above.
    908     if (Subtarget->useNEONForSinglePrecisionFP())
    909       Cost = 2;
    910     break;
    911   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
    912   case MVT::v4f32: case MVT::v2f64:
    913     RRC = &ARM::DPRRegClass;
    914     Cost = 2;
    915     break;
    916   case MVT::v4i64:
    917     RRC = &ARM::DPRRegClass;
    918     Cost = 4;
    919     break;
    920   case MVT::v8i64:
    921     RRC = &ARM::DPRRegClass;
    922     Cost = 8;
    923     break;
    924   }
    925   return std::make_pair(RRC, Cost);
    926 }
    927 
    928 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    929   switch (Opcode) {
    930   default: return 0;
    931   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
    932   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
    933   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
    934   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
    935   case ARMISD::CALL:          return "ARMISD::CALL";
    936   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
    937   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
    938   case ARMISD::tCALL:         return "ARMISD::tCALL";
    939   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
    940   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
    941   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
    942   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
    943   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
    944   case ARMISD::CMP:           return "ARMISD::CMP";
    945   case ARMISD::CMN:           return "ARMISD::CMN";
    946   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
    947   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
    948   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
    949   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
    950   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
    951 
    952   case ARMISD::CMOV:          return "ARMISD::CMOV";
    953 
    954   case ARMISD::RBIT:          return "ARMISD::RBIT";
    955 
    956   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
    957   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
    958   case ARMISD::SITOF:         return "ARMISD::SITOF";
    959   case ARMISD::UITOF:         return "ARMISD::UITOF";
    960 
    961   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
    962   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
    963   case ARMISD::RRX:           return "ARMISD::RRX";
    964 
    965   case ARMISD::ADDC:          return "ARMISD::ADDC";
    966   case ARMISD::ADDE:          return "ARMISD::ADDE";
    967   case ARMISD::SUBC:          return "ARMISD::SUBC";
    968   case ARMISD::SUBE:          return "ARMISD::SUBE";
    969 
    970   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
    971   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
    972 
    973   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
    974   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
    975 
    976   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
    977 
    978   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
    979 
    980   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
    981 
    982   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
    983   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
    984 
    985   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
    986 
    987   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
    988   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
    989   case ARMISD::VCGE:          return "ARMISD::VCGE";
    990   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
    991   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
    992   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
    993   case ARMISD::VCGT:          return "ARMISD::VCGT";
    994   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
    995   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
    996   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
    997   case ARMISD::VTST:          return "ARMISD::VTST";
    998 
    999   case ARMISD::VSHL:          return "ARMISD::VSHL";
   1000   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
   1001   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
   1002   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
   1003   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
   1004   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
   1005   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
   1006   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
   1007   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
   1008   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
   1009   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
   1010   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
   1011   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
   1012   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
   1013   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
   1014   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
   1015   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
   1016   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
   1017   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
   1018   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   1019   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   1020   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
   1021   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
   1022   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
   1023   case ARMISD::VDUP:          return "ARMISD::VDUP";
   1024   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
   1025   case ARMISD::VEXT:          return "ARMISD::VEXT";
   1026   case ARMISD::VREV64:        return "ARMISD::VREV64";
   1027   case ARMISD::VREV32:        return "ARMISD::VREV32";
   1028   case ARMISD::VREV16:        return "ARMISD::VREV16";
   1029   case ARMISD::VZIP:          return "ARMISD::VZIP";
   1030   case ARMISD::VUZP:          return "ARMISD::VUZP";
   1031   case ARMISD::VTRN:          return "ARMISD::VTRN";
   1032   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
   1033   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
   1034   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
   1035   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
   1036   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
   1037   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
   1038   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
   1039   case ARMISD::FMAX:          return "ARMISD::FMAX";
   1040   case ARMISD::FMIN:          return "ARMISD::FMIN";
   1041   case ARMISD::BFI:           return "ARMISD::BFI";
   1042   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   1043   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
   1044   case ARMISD::VBSL:          return "ARMISD::VBSL";
   1045   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   1046   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   1047   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
   1048   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
   1049   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
   1050   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
   1051   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
   1052   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
   1053   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
   1054   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
   1055   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
   1056   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
   1057   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
   1058   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
   1059   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
   1060   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
   1061   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
   1062   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   1063   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   1064   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
   1065   }
   1066 }
   1067 
   1068 EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
   1069   if (!VT.isVector()) return getPointerTy();
   1070   return VT.changeVectorElementTypeToInteger();
   1071 }
   1072 
   1073 /// getRegClassFor - Return the register class that should be used for the
   1074 /// specified value type.
   1075 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   1076   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   1077   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   1078   // load / store 4 to 8 consecutive D registers.
   1079   if (Subtarget->hasNEON()) {
   1080     if (VT == MVT::v4i64)
   1081       return &ARM::QQPRRegClass;
   1082     if (VT == MVT::v8i64)
   1083       return &ARM::QQQQPRRegClass;
   1084   }
   1085   return TargetLowering::getRegClassFor(VT);
   1086 }
   1087 
   1088 // Create a fast isel object.
   1089 FastISel *
   1090 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   1091                                   const TargetLibraryInfo *libInfo) const {
   1092   return ARM::createFastISel(funcInfo, libInfo);
   1093 }
   1094 
   1095 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
   1096 /// be used for loads / stores from the global.
   1097 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
   1098   return (Subtarget->isThumb1Only() ? 127 : 4095);
   1099 }
   1100 
   1101 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   1102   unsigned NumVals = N->getNumValues();
   1103   if (!NumVals)
   1104     return Sched::RegPressure;
   1105 
   1106   for (unsigned i = 0; i != NumVals; ++i) {
   1107     EVT VT = N->getValueType(i);
   1108     if (VT == MVT::Glue || VT == MVT::Other)
   1109       continue;
   1110     if (VT.isFloatingPoint() || VT.isVector())
   1111       return Sched::ILP;
   1112   }
   1113 
   1114   if (!N->isMachineOpcode())
   1115     return Sched::RegPressure;
   1116 
   1117   // Load are scheduled for latency even if there instruction itinerary
   1118   // is not available.
   1119   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   1120   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   1121 
   1122   if (MCID.getNumDefs() == 0)
   1123     return Sched::RegPressure;
   1124   if (!Itins->isEmpty() &&
   1125       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
   1126     return Sched::ILP;
   1127 
   1128   return Sched::RegPressure;
   1129 }
   1130 
   1131 //===----------------------------------------------------------------------===//
   1132 // Lowering Code
   1133 //===----------------------------------------------------------------------===//
   1134 
   1135 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
   1136 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   1137   switch (CC) {
   1138   default: llvm_unreachable("Unknown condition code!");
   1139   case ISD::SETNE:  return ARMCC::NE;
   1140   case ISD::SETEQ:  return ARMCC::EQ;
   1141   case ISD::SETGT:  return ARMCC::GT;
   1142   case ISD::SETGE:  return ARMCC::GE;
   1143   case ISD::SETLT:  return ARMCC::LT;
   1144   case ISD::SETLE:  return ARMCC::LE;
   1145   case ISD::SETUGT: return ARMCC::HI;
   1146   case ISD::SETUGE: return ARMCC::HS;
   1147   case ISD::SETULT: return ARMCC::LO;
   1148   case ISD::SETULE: return ARMCC::LS;
   1149   }
   1150 }
   1151 
   1152 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
   1153 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   1154                         ARMCC::CondCodes &CondCode2) {
   1155   CondCode2 = ARMCC::AL;
   1156   switch (CC) {
   1157   default: llvm_unreachable("Unknown FP condition!");
   1158   case ISD::SETEQ:
   1159   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
   1160   case ISD::SETGT:
   1161   case ISD::SETOGT: CondCode = ARMCC::GT; break;
   1162   case ISD::SETGE:
   1163   case ISD::SETOGE: CondCode = ARMCC::GE; break;
   1164   case ISD::SETOLT: CondCode = ARMCC::MI; break;
   1165   case ISD::SETOLE: CondCode = ARMCC::LS; break;
   1166   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
   1167   case ISD::SETO:   CondCode = ARMCC::VC; break;
   1168   case ISD::SETUO:  CondCode = ARMCC::VS; break;
   1169   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
   1170   case ISD::SETUGT: CondCode = ARMCC::HI; break;
   1171   case ISD::SETUGE: CondCode = ARMCC::PL; break;
   1172   case ISD::SETLT:
   1173   case ISD::SETULT: CondCode = ARMCC::LT; break;
   1174   case ISD::SETLE:
   1175   case ISD::SETULE: CondCode = ARMCC::LE; break;
   1176   case ISD::SETNE:
   1177   case ISD::SETUNE: CondCode = ARMCC::NE; break;
   1178   }
   1179 }
   1180 
   1181 //===----------------------------------------------------------------------===//
   1182 //                      Calling Convention Implementation
   1183 //===----------------------------------------------------------------------===//
   1184 
   1185 #include "ARMGenCallingConv.inc"
   1186 
   1187 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
   1188 /// given CallingConvention value.
   1189 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
   1190                                                  bool Return,
   1191                                                  bool isVarArg) const {
   1192   switch (CC) {
   1193   default:
   1194     llvm_unreachable("Unsupported calling convention");
   1195   case CallingConv::Fast:
   1196     if (Subtarget->hasVFP2() && !isVarArg) {
   1197       if (!Subtarget->isAAPCS_ABI())
   1198         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1199       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1200       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1201     }
   1202     // Fallthrough
   1203   case CallingConv::C: {
   1204     // Use target triple & subtarget features to do actual dispatch.
   1205     if (!Subtarget->isAAPCS_ABI())
   1206       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1207     else if (Subtarget->hasVFP2() &&
   1208              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
   1209              !isVarArg)
   1210       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1211     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1212   }
   1213   case CallingConv::ARM_AAPCS_VFP:
   1214     if (!isVarArg)
   1215       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1216     // Fallthrough
   1217   case CallingConv::ARM_AAPCS:
   1218     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1219   case CallingConv::ARM_APCS:
   1220     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1221   case CallingConv::GHC:
   1222     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
   1223   }
   1224 }
   1225 
   1226 /// LowerCallResult - Lower the result values of a call into the
   1227 /// appropriate copies out of appropriate physical registers.
   1228 SDValue
   1229 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   1230                                    CallingConv::ID CallConv, bool isVarArg,
   1231                                    const SmallVectorImpl<ISD::InputArg> &Ins,
   1232                                    DebugLoc dl, SelectionDAG &DAG,
   1233                                    SmallVectorImpl<SDValue> &InVals) const {
   1234 
   1235   // Assign locations to each value returned by this call.
   1236   SmallVector<CCValAssign, 16> RVLocs;
   1237   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1238                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1239   CCInfo.AnalyzeCallResult(Ins,
   1240                            CCAssignFnForNode(CallConv, /* Return*/ true,
   1241                                              isVarArg));
   1242 
   1243   // Copy all of the result registers out of their specified physreg.
   1244   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   1245     CCValAssign VA = RVLocs[i];
   1246 
   1247     SDValue Val;
   1248     if (VA.needsCustom()) {
   1249       // Handle f64 or half of a v2f64.
   1250       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1251                                       InFlag);
   1252       Chain = Lo.getValue(1);
   1253       InFlag = Lo.getValue(2);
   1254       VA = RVLocs[++i]; // skip ahead to next loc
   1255       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1256                                       InFlag);
   1257       Chain = Hi.getValue(1);
   1258       InFlag = Hi.getValue(2);
   1259       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1260 
   1261       if (VA.getLocVT() == MVT::v2f64) {
   1262         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   1263         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1264                           DAG.getConstant(0, MVT::i32));
   1265 
   1266         VA = RVLocs[++i]; // skip ahead to next loc
   1267         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1268         Chain = Lo.getValue(1);
   1269         InFlag = Lo.getValue(2);
   1270         VA = RVLocs[++i]; // skip ahead to next loc
   1271         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1272         Chain = Hi.getValue(1);
   1273         InFlag = Hi.getValue(2);
   1274         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1275         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1276                           DAG.getConstant(1, MVT::i32));
   1277       }
   1278     } else {
   1279       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
   1280                                InFlag);
   1281       Chain = Val.getValue(1);
   1282       InFlag = Val.getValue(2);
   1283     }
   1284 
   1285     switch (VA.getLocInfo()) {
   1286     default: llvm_unreachable("Unknown loc info!");
   1287     case CCValAssign::Full: break;
   1288     case CCValAssign::BCvt:
   1289       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
   1290       break;
   1291     }
   1292 
   1293     InVals.push_back(Val);
   1294   }
   1295 
   1296   return Chain;
   1297 }
   1298 
   1299 /// LowerMemOpCallTo - Store the argument to the stack.
   1300 SDValue
   1301 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   1302                                     SDValue StackPtr, SDValue Arg,
   1303                                     DebugLoc dl, SelectionDAG &DAG,
   1304                                     const CCValAssign &VA,
   1305                                     ISD::ArgFlagsTy Flags) const {
   1306   unsigned LocMemOffset = VA.getLocMemOffset();
   1307   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1308   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   1309   return DAG.getStore(Chain, dl, Arg, PtrOff,
   1310                       MachinePointerInfo::getStack(LocMemOffset),
   1311                       false, false, 0);
   1312 }
   1313 
   1314 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
   1315                                          SDValue Chain, SDValue &Arg,
   1316                                          RegsToPassVector &RegsToPass,
   1317                                          CCValAssign &VA, CCValAssign &NextVA,
   1318                                          SDValue &StackPtr,
   1319                                          SmallVector<SDValue, 8> &MemOpChains,
   1320                                          ISD::ArgFlagsTy Flags) const {
   1321 
   1322   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   1323                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
   1324   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
   1325 
   1326   if (NextVA.isRegLoc())
   1327     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
   1328   else {
   1329     assert(NextVA.isMemLoc());
   1330     if (StackPtr.getNode() == 0)
   1331       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1332 
   1333     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
   1334                                            dl, DAG, NextVA,
   1335                                            Flags));
   1336   }
   1337 }
   1338 
   1339 /// LowerCall - Lowering a call into a callseq_start <-
   1340 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
   1341 /// nodes.
   1342 SDValue
   1343 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   1344                              SmallVectorImpl<SDValue> &InVals) const {
   1345   SelectionDAG &DAG                     = CLI.DAG;
   1346   DebugLoc &dl                          = CLI.DL;
   1347   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
   1348   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
   1349   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
   1350   SDValue Chain                         = CLI.Chain;
   1351   SDValue Callee                        = CLI.Callee;
   1352   bool &isTailCall                      = CLI.IsTailCall;
   1353   CallingConv::ID CallConv              = CLI.CallConv;
   1354   bool doesNotRet                       = CLI.DoesNotReturn;
   1355   bool isVarArg                         = CLI.IsVarArg;
   1356 
   1357   MachineFunction &MF = DAG.getMachineFunction();
   1358   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   1359   bool IsSibCall = false;
   1360   // Disable tail calls if they're not supported.
   1361   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   1362     isTailCall = false;
   1363   if (isTailCall) {
   1364     // Check if it's really possible to do a tail call.
   1365     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   1366                     isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
   1367                                                    Outs, OutVals, Ins, DAG);
   1368     // We don't support GuaranteedTailCallOpt for ARM, only automatically
   1369     // detected sibcalls.
   1370     if (isTailCall) {
   1371       ++NumTailCalls;
   1372       IsSibCall = true;
   1373     }
   1374   }
   1375 
   1376   // Analyze operands of the call, assigning locations to each operand.
   1377   SmallVector<CCValAssign, 16> ArgLocs;
   1378   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1379                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1380   CCInfo.AnalyzeCallOperands(Outs,
   1381                              CCAssignFnForNode(CallConv, /* Return*/ false,
   1382                                                isVarArg));
   1383 
   1384   // Get a count of how many bytes are to be pushed on the stack.
   1385   unsigned NumBytes = CCInfo.getNextStackOffset();
   1386 
   1387   // For tail calls, memory operands are available in our caller's stack.
   1388   if (IsSibCall)
   1389     NumBytes = 0;
   1390 
   1391   // Adjust the stack pointer for the new arguments...
   1392   // These operations are automatically eliminated by the prolog/epilog pass
   1393   if (!IsSibCall)
   1394     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
   1395 
   1396   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1397 
   1398   RegsToPassVector RegsToPass;
   1399   SmallVector<SDValue, 8> MemOpChains;
   1400 
   1401   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   1402   // of tail call optimization, arguments are handled later.
   1403   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1404        i != e;
   1405        ++i, ++realArgIdx) {
   1406     CCValAssign &VA = ArgLocs[i];
   1407     SDValue Arg = OutVals[realArgIdx];
   1408     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1409     bool isByVal = Flags.isByVal();
   1410 
   1411     // Promote the value if needed.
   1412     switch (VA.getLocInfo()) {
   1413     default: llvm_unreachable("Unknown loc info!");
   1414     case CCValAssign::Full: break;
   1415     case CCValAssign::SExt:
   1416       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
   1417       break;
   1418     case CCValAssign::ZExt:
   1419       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
   1420       break;
   1421     case CCValAssign::AExt:
   1422       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
   1423       break;
   1424     case CCValAssign::BCvt:
   1425       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1426       break;
   1427     }
   1428 
   1429     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
   1430     if (VA.needsCustom()) {
   1431       if (VA.getLocVT() == MVT::v2f64) {
   1432         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1433                                   DAG.getConstant(0, MVT::i32));
   1434         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1435                                   DAG.getConstant(1, MVT::i32));
   1436 
   1437         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
   1438                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1439 
   1440         VA = ArgLocs[++i]; // skip ahead to next loc
   1441         if (VA.isRegLoc()) {
   1442           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
   1443                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1444         } else {
   1445           assert(VA.isMemLoc());
   1446 
   1447           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
   1448                                                  dl, DAG, VA, Flags));
   1449         }
   1450       } else {
   1451         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
   1452                          StackPtr, MemOpChains, Flags);
   1453       }
   1454     } else if (VA.isRegLoc()) {
   1455       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   1456     } else if (isByVal) {
   1457       assert(VA.isMemLoc());
   1458       unsigned offset = 0;
   1459 
   1460       // True if this byval aggregate will be split between registers
   1461       // and memory.
   1462       if (CCInfo.isFirstByValRegValid()) {
   1463         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1464         unsigned int i, j;
   1465         for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
   1466           SDValue Const = DAG.getConstant(4*i, MVT::i32);
   1467           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
   1468           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
   1469                                      MachinePointerInfo(),
   1470                                      false, false, false, 0);
   1471           MemOpChains.push_back(Load.getValue(1));
   1472           RegsToPass.push_back(std::make_pair(j, Load));
   1473         }
   1474         offset = ARM::R4 - CCInfo.getFirstByValReg();
   1475         CCInfo.clearFirstByValReg();
   1476       }
   1477 
   1478       if (Flags.getByValSize() - 4*offset > 0) {
   1479         unsigned LocMemOffset = VA.getLocMemOffset();
   1480         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1481         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
   1482                                   StkPtrOff);
   1483         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
   1484         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
   1485         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
   1486                                            MVT::i32);
   1487         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
   1488 
   1489         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1490         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
   1491         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
   1492                                           Ops, array_lengthof(Ops)));
   1493       }
   1494     } else if (!IsSibCall) {
   1495       assert(VA.isMemLoc());
   1496 
   1497       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
   1498                                              dl, DAG, VA, Flags));
   1499     }
   1500   }
   1501 
   1502   if (!MemOpChains.empty())
   1503     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1504                         &MemOpChains[0], MemOpChains.size());
   1505 
   1506   // Build a sequence of copy-to-reg nodes chained together with token chain
   1507   // and flag operands which copy the outgoing args into the appropriate regs.
   1508   SDValue InFlag;
   1509   // Tail call byval lowering might overwrite argument registers so in case of
   1510   // tail call optimization the copies to registers are lowered later.
   1511   if (!isTailCall)
   1512     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1513       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1514                                RegsToPass[i].second, InFlag);
   1515       InFlag = Chain.getValue(1);
   1516     }
   1517 
   1518   // For tail calls lower the arguments to the 'real' stack slot.
   1519   if (isTailCall) {
   1520     // Force all the incoming stack arguments to be loaded from the stack
   1521     // before any new outgoing arguments are stored to the stack, because the
   1522     // outgoing stack slots may alias the incoming argument stack slots, and
   1523     // the alias isn't otherwise explicit. This is slightly more conservative
   1524     // than necessary, because it means that each store effectively depends
   1525     // on every argument instead of just those arguments it would clobber.
   1526 
   1527     // Do not flag preceding copytoreg stuff together with the following stuff.
   1528     InFlag = SDValue();
   1529     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1530       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1531                                RegsToPass[i].second, InFlag);
   1532       InFlag = Chain.getValue(1);
   1533     }
   1534     InFlag =SDValue();
   1535   }
   1536 
   1537   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   1538   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   1539   // node so that legalize doesn't hack it.
   1540   bool isDirect = false;
   1541   bool isARMFunc = false;
   1542   bool isLocalARMFunc = false;
   1543   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1544 
   1545   if (EnableARMLongCalls) {
   1546     assert (getTargetMachine().getRelocationModel() == Reloc::Static
   1547             && "long-calls with non-static relocation model!");
   1548     // Handle a global address or an external symbol. If it's not one of
   1549     // those, the target's already in a register, so we don't need to do
   1550     // anything extra.
   1551     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1552       const GlobalValue *GV = G->getGlobal();
   1553       // Create a constant pool entry for the callee address
   1554       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1555       ARMConstantPoolValue *CPV =
   1556         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
   1557 
   1558       // Get the address of the callee into a register
   1559       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1560       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1561       Callee = DAG.getLoad(getPointerTy(), dl,
   1562                            DAG.getEntryNode(), CPAddr,
   1563                            MachinePointerInfo::getConstantPool(),
   1564                            false, false, false, 0);
   1565     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1566       const char *Sym = S->getSymbol();
   1567 
   1568       // Create a constant pool entry for the callee address
   1569       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1570       ARMConstantPoolValue *CPV =
   1571         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1572                                       ARMPCLabelIndex, 0);
   1573       // Get the address of the callee into a register
   1574       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1575       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1576       Callee = DAG.getLoad(getPointerTy(), dl,
   1577                            DAG.getEntryNode(), CPAddr,
   1578                            MachinePointerInfo::getConstantPool(),
   1579                            false, false, false, 0);
   1580     }
   1581   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1582     const GlobalValue *GV = G->getGlobal();
   1583     isDirect = true;
   1584     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
   1585     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
   1586                    getTargetMachine().getRelocationModel() != Reloc::Static;
   1587     isARMFunc = !Subtarget->isThumb() || isStub;
   1588     // ARM call to a local ARM function is predicable.
   1589     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
   1590     // tBX takes a register source operand.
   1591     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1592       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1593       ARMConstantPoolValue *CPV =
   1594         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
   1595       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1596       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1597       Callee = DAG.getLoad(getPointerTy(), dl,
   1598                            DAG.getEntryNode(), CPAddr,
   1599                            MachinePointerInfo::getConstantPool(),
   1600                            false, false, false, 0);
   1601       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1602       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1603                            getPointerTy(), Callee, PICLabel);
   1604     } else {
   1605       // On ELF targets for PIC code, direct calls should go through the PLT
   1606       unsigned OpFlags = 0;
   1607       if (Subtarget->isTargetELF() &&
   1608           getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1609         OpFlags = ARMII::MO_PLT;
   1610       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
   1611     }
   1612   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1613     isDirect = true;
   1614     bool isStub = Subtarget->isTargetDarwin() &&
   1615                   getTargetMachine().getRelocationModel() != Reloc::Static;
   1616     isARMFunc = !Subtarget->isThumb() || isStub;
   1617     // tBX takes a register source operand.
   1618     const char *Sym = S->getSymbol();
   1619     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1620       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1621       ARMConstantPoolValue *CPV =
   1622         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1623                                       ARMPCLabelIndex, 4);
   1624       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1625       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1626       Callee = DAG.getLoad(getPointerTy(), dl,
   1627                            DAG.getEntryNode(), CPAddr,
   1628                            MachinePointerInfo::getConstantPool(),
   1629                            false, false, false, 0);
   1630       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1631       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1632                            getPointerTy(), Callee, PICLabel);
   1633     } else {
   1634       unsigned OpFlags = 0;
   1635       // On ELF targets for PIC code, direct calls should go through the PLT
   1636       if (Subtarget->isTargetELF() &&
   1637                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1638         OpFlags = ARMII::MO_PLT;
   1639       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
   1640     }
   1641   }
   1642 
   1643   // FIXME: handle tail calls differently.
   1644   unsigned CallOpc;
   1645   bool HasMinSizeAttr = MF.getFunction()->getAttributes().
   1646     hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
   1647   if (Subtarget->isThumb()) {
   1648     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
   1649       CallOpc = ARMISD::CALL_NOLINK;
   1650     else
   1651       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
   1652   } else {
   1653     if (!isDirect && !Subtarget->hasV5TOps())
   1654       CallOpc = ARMISD::CALL_NOLINK;
   1655     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
   1656                // Emit regular call when code size is the priority
   1657                !HasMinSizeAttr)
   1658       // "mov lr, pc; b _foo" to avoid confusing the RSP
   1659       CallOpc = ARMISD::CALL_NOLINK;
   1660     else
   1661       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
   1662   }
   1663 
   1664   std::vector<SDValue> Ops;
   1665   Ops.push_back(Chain);
   1666   Ops.push_back(Callee);
   1667 
   1668   // Add argument registers to the end of the list so that they are known live
   1669   // into the call.
   1670   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   1671     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   1672                                   RegsToPass[i].second.getValueType()));
   1673 
   1674   // Add a register mask operand representing the call-preserved registers.
   1675   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
   1676   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
   1677   assert(Mask && "Missing call preserved mask for calling convention");
   1678   Ops.push_back(DAG.getRegisterMask(Mask));
   1679 
   1680   if (InFlag.getNode())
   1681     Ops.push_back(InFlag);
   1682 
   1683   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1684   if (isTailCall)
   1685     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   1686 
   1687   // Returns a chain and a flag for retval copy to use.
   1688   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   1689   InFlag = Chain.getValue(1);
   1690 
   1691   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1692                              DAG.getIntPtrConstant(0, true), InFlag);
   1693   if (!Ins.empty())
   1694     InFlag = Chain.getValue(1);
   1695 
   1696   // Handle result values, copying them out of physregs into vregs that we
   1697   // return.
   1698   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
   1699                          dl, DAG, InVals);
   1700 }
   1701 
   1702 /// HandleByVal - Every parameter *after* a byval parameter is passed
   1703 /// on the stack.  Remember the next parameter register to allocate,
   1704 /// and then confiscate the rest of the parameter registers to insure
   1705 /// this.
   1706 void
   1707 ARMTargetLowering::HandleByVal(
   1708     CCState *State, unsigned &size, unsigned Align) const {
   1709   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   1710   assert((State->getCallOrPrologue() == Prologue ||
   1711           State->getCallOrPrologue() == Call) &&
   1712          "unhandled ParmContext");
   1713   if ((!State->isFirstByValRegValid()) &&
   1714       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
   1715     if (Subtarget->isAAPCS_ABI() && Align > 4) {
   1716       unsigned AlignInRegs = Align / 4;
   1717       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
   1718       for (unsigned i = 0; i < Waste; ++i)
   1719         reg = State->AllocateReg(GPRArgRegs, 4);
   1720     }
   1721     if (reg != 0) {
   1722       State->setFirstByValReg(reg);
   1723       // At a call site, a byval parameter that is split between
   1724       // registers and memory needs its size truncated here.  In a
   1725       // function prologue, such byval parameters are reassembled in
   1726       // memory, and are not truncated.
   1727       if (State->getCallOrPrologue() == Call) {
   1728         unsigned excess = 4 * (ARM::R4 - reg);
   1729         assert(size >= excess && "expected larger existing stack allocation");
   1730         size -= excess;
   1731       }
   1732     }
   1733   }
   1734   // Confiscate any remaining parameter registers to preclude their
   1735   // assignment to subsequent parameters.
   1736   while (State->AllocateReg(GPRArgRegs, 4))
   1737     ;
   1738 }
   1739 
   1740 /// MatchingStackOffset - Return true if the given stack call argument is
   1741 /// already available in the same position (relatively) of the caller's
   1742 /// incoming argument stack.
   1743 static
   1744 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   1745                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
   1746                          const TargetInstrInfo *TII) {
   1747   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
   1748   int FI = INT_MAX;
   1749   if (Arg.getOpcode() == ISD::CopyFromReg) {
   1750     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
   1751     if (!TargetRegisterInfo::isVirtualRegister(VR))
   1752       return false;
   1753     MachineInstr *Def = MRI->getVRegDef(VR);
   1754     if (!Def)
   1755       return false;
   1756     if (!Flags.isByVal()) {
   1757       if (!TII->isLoadFromStackSlot(Def, FI))
   1758         return false;
   1759     } else {
   1760       return false;
   1761     }
   1762   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
   1763     if (Flags.isByVal())
   1764       // ByVal argument is passed in as a pointer but it's now being
   1765       // dereferenced. e.g.
   1766       // define @foo(%struct.X* %A) {
   1767       //   tail call @bar(%struct.X* byval %A)
   1768       // }
   1769       return false;
   1770     SDValue Ptr = Ld->getBasePtr();
   1771     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
   1772     if (!FINode)
   1773       return false;
   1774     FI = FINode->getIndex();
   1775   } else
   1776     return false;
   1777 
   1778   assert(FI != INT_MAX);
   1779   if (!MFI->isFixedObjectIndex(FI))
   1780     return false;
   1781   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
   1782 }
   1783 
   1784 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   1785 /// for tail call optimization. Targets which want to do tail call
   1786 /// optimization should implement this function.
   1787 bool
   1788 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   1789                                                      CallingConv::ID CalleeCC,
   1790                                                      bool isVarArg,
   1791                                                      bool isCalleeStructRet,
   1792                                                      bool isCallerStructRet,
   1793                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   1794                                     const SmallVectorImpl<SDValue> &OutVals,
   1795                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   1796                                                      SelectionDAG& DAG) const {
   1797   const Function *CallerF = DAG.getMachineFunction().getFunction();
   1798   CallingConv::ID CallerCC = CallerF->getCallingConv();
   1799   bool CCMatch = CallerCC == CalleeCC;
   1800 
   1801   // Look for obvious safe cases to perform tail call optimization that do not
   1802   // require ABI changes. This is what gcc calls sibcall.
   1803 
   1804   // Do not sibcall optimize vararg calls unless the call site is not passing
   1805   // any arguments.
   1806   if (isVarArg && !Outs.empty())
   1807     return false;
   1808 
   1809   // Also avoid sibcall optimization if either caller or callee uses struct
   1810   // return semantics.
   1811   if (isCalleeStructRet || isCallerStructRet)
   1812     return false;
   1813 
   1814   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
   1815   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
   1816   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
   1817   // support in the assembler and linker to be used. This would need to be
   1818   // fixed to fully support tail calls in Thumb1.
   1819   //
   1820   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   1821   // LR.  This means if we need to reload LR, it takes an extra instructions,
   1822   // which outweighs the value of the tail call; but here we don't know yet
   1823   // whether LR is going to be used.  Probably the right approach is to
   1824   // generate the tail call here and turn it back into CALL/RET in
   1825   // emitEpilogue if LR is used.
   1826 
   1827   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   1828   // but we need to make sure there are enough registers; the only valid
   1829   // registers are the 4 used for parameters.  We don't currently do this
   1830   // case.
   1831   if (Subtarget->isThumb1Only())
   1832     return false;
   1833 
   1834   // If the calling conventions do not match, then we'd better make sure the
   1835   // results are returned in the same way as what the caller expects.
   1836   if (!CCMatch) {
   1837     SmallVector<CCValAssign, 16> RVLocs1;
   1838     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
   1839                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
   1840     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
   1841 
   1842     SmallVector<CCValAssign, 16> RVLocs2;
   1843     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
   1844                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
   1845     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
   1846 
   1847     if (RVLocs1.size() != RVLocs2.size())
   1848       return false;
   1849     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
   1850       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
   1851         return false;
   1852       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
   1853         return false;
   1854       if (RVLocs1[i].isRegLoc()) {
   1855         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
   1856           return false;
   1857       } else {
   1858         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
   1859           return false;
   1860       }
   1861     }
   1862   }
   1863 
   1864   // If Caller's vararg or byval argument has been split between registers and
   1865   // stack, do not perform tail call, since part of the argument is in caller's
   1866   // local frame.
   1867   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
   1868                                       getInfo<ARMFunctionInfo>();
   1869   if (AFI_Caller->getVarArgsRegSaveSize())
   1870     return false;
   1871 
   1872   // If the callee takes no arguments then go on to check the results of the
   1873   // call.
   1874   if (!Outs.empty()) {
   1875     // Check if stack adjustment is needed. For now, do not do this if any
   1876     // argument is passed on the stack.
   1877     SmallVector<CCValAssign, 16> ArgLocs;
   1878     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
   1879                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1880     CCInfo.AnalyzeCallOperands(Outs,
   1881                                CCAssignFnForNode(CalleeCC, false, isVarArg));
   1882     if (CCInfo.getNextStackOffset()) {
   1883       MachineFunction &MF = DAG.getMachineFunction();
   1884 
   1885       // Check if the arguments are already laid out in the right way as
   1886       // the caller's fixed stack objects.
   1887       MachineFrameInfo *MFI = MF.getFrameInfo();
   1888       const MachineRegisterInfo *MRI = &MF.getRegInfo();
   1889       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   1890       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1891            i != e;
   1892            ++i, ++realArgIdx) {
   1893         CCValAssign &VA = ArgLocs[i];
   1894         EVT RegVT = VA.getLocVT();
   1895         SDValue Arg = OutVals[realArgIdx];
   1896         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1897         if (VA.getLocInfo() == CCValAssign::Indirect)
   1898           return false;
   1899         if (VA.needsCustom()) {
   1900           // f64 and vector types are split into multiple registers or
   1901           // register/stack-slot combinations.  The types will not match
   1902           // the registers; give up on memory f64 refs until we figure
   1903           // out what to do about this.
   1904           if (!VA.isRegLoc())
   1905             return false;
   1906           if (!ArgLocs[++i].isRegLoc())
   1907             return false;
   1908           if (RegVT == MVT::v2f64) {
   1909             if (!ArgLocs[++i].isRegLoc())
   1910               return false;
   1911             if (!ArgLocs[++i].isRegLoc())
   1912               return false;
   1913           }
   1914         } else if (!VA.isRegLoc()) {
   1915           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
   1916                                    MFI, MRI, TII))
   1917             return false;
   1918         }
   1919       }
   1920     }
   1921   }
   1922 
   1923   return true;
   1924 }
   1925 
   1926 bool
   1927 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
   1928                                   MachineFunction &MF, bool isVarArg,
   1929                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
   1930                                   LLVMContext &Context) const {
   1931   SmallVector<CCValAssign, 16> RVLocs;
   1932   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
   1933   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
   1934                                                     isVarArg));
   1935 }
   1936 
   1937 SDValue
   1938 ARMTargetLowering::LowerReturn(SDValue Chain,
   1939                                CallingConv::ID CallConv, bool isVarArg,
   1940                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   1941                                const SmallVectorImpl<SDValue> &OutVals,
   1942                                DebugLoc dl, SelectionDAG &DAG) const {
   1943 
   1944   // CCValAssign - represent the assignment of the return value to a location.
   1945   SmallVector<CCValAssign, 16> RVLocs;
   1946 
   1947   // CCState - Info about the registers and stack slots.
   1948   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1949                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1950 
   1951   // Analyze outgoing return values.
   1952   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
   1953                                                isVarArg));
   1954 
   1955   SDValue Flag;
   1956   SmallVector<SDValue, 4> RetOps;
   1957   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   1958 
   1959   // Copy the result values into the output registers.
   1960   for (unsigned i = 0, realRVLocIdx = 0;
   1961        i != RVLocs.size();
   1962        ++i, ++realRVLocIdx) {
   1963     CCValAssign &VA = RVLocs[i];
   1964     assert(VA.isRegLoc() && "Can only return in registers!");
   1965 
   1966     SDValue Arg = OutVals[realRVLocIdx];
   1967 
   1968     switch (VA.getLocInfo()) {
   1969     default: llvm_unreachable("Unknown loc info!");
   1970     case CCValAssign::Full: break;
   1971     case CCValAssign::BCvt:
   1972       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1973       break;
   1974     }
   1975 
   1976     if (VA.needsCustom()) {
   1977       if (VA.getLocVT() == MVT::v2f64) {
   1978         // Extract the first half and return it in two registers.
   1979         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1980                                    DAG.getConstant(0, MVT::i32));
   1981         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
   1982                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
   1983 
   1984         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
   1985         Flag = Chain.getValue(1);
   1986         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   1987         VA = RVLocs[++i]; // skip ahead to next loc
   1988         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
   1989                                  HalfGPRs.getValue(1), Flag);
   1990         Flag = Chain.getValue(1);
   1991         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   1992         VA = RVLocs[++i]; // skip ahead to next loc
   1993 
   1994         // Extract the 2nd half and fall through to handle it as an f64 value.
   1995         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1996                           DAG.getConstant(1, MVT::i32));
   1997       }
   1998       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
   1999       // available.
   2000       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   2001                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
   2002       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
   2003       Flag = Chain.getValue(1);
   2004       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2005       VA = RVLocs[++i]; // skip ahead to next loc
   2006       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
   2007                                Flag);
   2008     } else
   2009       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
   2010 
   2011     // Guarantee that all emitted copies are
   2012     // stuck together, avoiding something bad.
   2013     Flag = Chain.getValue(1);
   2014     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2015   }
   2016 
   2017   // Update chain and glue.
   2018   RetOps[0] = Chain;
   2019   if (Flag.getNode())
   2020     RetOps.push_back(Flag);
   2021 
   2022   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
   2023                      RetOps.data(), RetOps.size());
   2024 }
   2025 
   2026 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   2027   if (N->getNumValues() != 1)
   2028     return false;
   2029   if (!N->hasNUsesOfValue(1, 0))
   2030     return false;
   2031 
   2032   SDValue TCChain = Chain;
   2033   SDNode *Copy = *N->use_begin();
   2034   if (Copy->getOpcode() == ISD::CopyToReg) {
   2035     // If the copy has a glue operand, we conservatively assume it isn't safe to
   2036     // perform a tail call.
   2037     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
   2038       return false;
   2039     TCChain = Copy->getOperand(0);
   2040   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
   2041     SDNode *VMov = Copy;
   2042     // f64 returned in a pair of GPRs.
   2043     SmallPtrSet<SDNode*, 2> Copies;
   2044     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2045          UI != UE; ++UI) {
   2046       if (UI->getOpcode() != ISD::CopyToReg)
   2047         return false;
   2048       Copies.insert(*UI);
   2049     }
   2050     if (Copies.size() > 2)
   2051       return false;
   2052 
   2053     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2054          UI != UE; ++UI) {
   2055       SDValue UseChain = UI->getOperand(0);
   2056       if (Copies.count(UseChain.getNode()))
   2057         // Second CopyToReg
   2058         Copy = *UI;
   2059       else
   2060         // First CopyToReg
   2061         TCChain = UseChain;
   2062     }
   2063   } else if (Copy->getOpcode() == ISD::BITCAST) {
   2064     // f32 returned in a single GPR.
   2065     if (!Copy->hasOneUse())
   2066       return false;
   2067     Copy = *Copy->use_begin();
   2068     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
   2069       return false;
   2070     Chain = Copy->getOperand(0);
   2071   } else {
   2072     return false;
   2073   }
   2074 
   2075   bool HasRet = false;
   2076   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
   2077        UI != UE; ++UI) {
   2078     if (UI->getOpcode() != ARMISD::RET_FLAG)
   2079       return false;
   2080     HasRet = true;
   2081   }
   2082 
   2083   if (!HasRet)
   2084     return false;
   2085 
   2086   Chain = TCChain;
   2087   return true;
   2088 }
   2089 
   2090 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   2091   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   2092     return false;
   2093 
   2094   if (!CI->isTailCall())
   2095     return false;
   2096 
   2097   return !Subtarget->isThumb1Only();
   2098 }
   2099 
   2100 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
   2101 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
   2102 // one of the above mentioned nodes. It has to be wrapped because otherwise
   2103 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
   2104 // be used to form addressing mode. These wrapped nodes will be selected
   2105 // into MOVi.
   2106 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   2107   EVT PtrVT = Op.getValueType();
   2108   // FIXME there is no actual debug info here
   2109   DebugLoc dl = Op.getDebugLoc();
   2110   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   2111   SDValue Res;
   2112   if (CP->isMachineConstantPoolEntry())
   2113     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
   2114                                     CP->getAlignment());
   2115   else
   2116     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
   2117                                     CP->getAlignment());
   2118   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
   2119 }
   2120 
   2121 unsigned ARMTargetLowering::getJumpTableEncoding() const {
   2122   return MachineJumpTableInfo::EK_Inline;
   2123 }
   2124 
   2125 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   2126                                              SelectionDAG &DAG) const {
   2127   MachineFunction &MF = DAG.getMachineFunction();
   2128   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2129   unsigned ARMPCLabelIndex = 0;
   2130   DebugLoc DL = Op.getDebugLoc();
   2131   EVT PtrVT = getPointerTy();
   2132   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   2133   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2134   SDValue CPAddr;
   2135   if (RelocM == Reloc::Static) {
   2136     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   2137   } else {
   2138     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2139     ARMPCLabelIndex = AFI->createPICLabelUId();
   2140     ARMConstantPoolValue *CPV =
   2141       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
   2142                                       ARMCP::CPBlockAddress, PCAdj);
   2143     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2144   }
   2145   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   2146   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
   2147                                MachinePointerInfo::getConstantPool(),
   2148                                false, false, false, 0);
   2149   if (RelocM == Reloc::Static)
   2150     return Result;
   2151   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2152   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
   2153 }
   2154 
   2155 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
   2156 SDValue
   2157 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   2158                                                  SelectionDAG &DAG) const {
   2159   DebugLoc dl = GA->getDebugLoc();
   2160   EVT PtrVT = getPointerTy();
   2161   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2162   MachineFunction &MF = DAG.getMachineFunction();
   2163   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2164   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2165   ARMConstantPoolValue *CPV =
   2166     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2167                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   2168   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2169   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   2170   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
   2171                          MachinePointerInfo::getConstantPool(),
   2172                          false, false, false, 0);
   2173   SDValue Chain = Argument.getValue(1);
   2174 
   2175   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2176   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
   2177 
   2178   // call __tls_get_addr.
   2179   ArgListTy Args;
   2180   ArgListEntry Entry;
   2181   Entry.Node = Argument;
   2182   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
   2183   Args.push_back(Entry);
   2184   // FIXME: is there useful debug info available here?
   2185   TargetLowering::CallLoweringInfo CLI(Chain,
   2186                 (Type *) Type::getInt32Ty(*DAG.getContext()),
   2187                 false, false, false, false,
   2188                 0, CallingConv::C, /*isTailCall=*/false,
   2189                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
   2190                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   2191   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   2192   return CallResult.first;
   2193 }
   2194 
   2195 // Lower ISD::GlobalTLSAddress using the "initial exec" or
   2196 // "local exec" model.
   2197 SDValue
   2198 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   2199                                         SelectionDAG &DAG,
   2200                                         TLSModel::Model model) const {
   2201   const GlobalValue *GV = GA->getGlobal();
   2202   DebugLoc dl = GA->getDebugLoc();
   2203   SDValue Offset;
   2204   SDValue Chain = DAG.getEntryNode();
   2205   EVT PtrVT = getPointerTy();
   2206   // Get the Thread Pointer
   2207   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2208 
   2209   if (model == TLSModel::InitialExec) {
   2210     MachineFunction &MF = DAG.getMachineFunction();
   2211     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2212     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2213     // Initial exec model.
   2214     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2215     ARMConstantPoolValue *CPV =
   2216       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2217                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
   2218                                       true);
   2219     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2220     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2221     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2222                          MachinePointerInfo::getConstantPool(),
   2223                          false, false, false, 0);
   2224     Chain = Offset.getValue(1);
   2225 
   2226     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2227     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
   2228 
   2229     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2230                          MachinePointerInfo::getConstantPool(),
   2231                          false, false, false, 0);
   2232   } else {
   2233     // local exec model
   2234     assert(model == TLSModel::LocalExec);
   2235     ARMConstantPoolValue *CPV =
   2236       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
   2237     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2238     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2239     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2240                          MachinePointerInfo::getConstantPool(),
   2241                          false, false, false, 0);
   2242   }
   2243 
   2244   // The address of the thread local variable is the add of the thread
   2245   // pointer with the offset of the variable.
   2246   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
   2247 }
   2248 
   2249 SDValue
   2250 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   2251   // TODO: implement the "local dynamic" model
   2252   assert(Subtarget->isTargetELF() &&
   2253          "TLS not implemented for non-ELF targets");
   2254   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   2255 
   2256   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
   2257 
   2258   switch (model) {
   2259     case TLSModel::GeneralDynamic:
   2260     case TLSModel::LocalDynamic:
   2261       return LowerToTLSGeneralDynamicModel(GA, DAG);
   2262     case TLSModel::InitialExec:
   2263     case TLSModel::LocalExec:
   2264       return LowerToTLSExecModels(GA, DAG, model);
   2265   }
   2266   llvm_unreachable("bogus TLS model");
   2267 }
   2268 
   2269 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   2270                                                  SelectionDAG &DAG) const {
   2271   EVT PtrVT = getPointerTy();
   2272   DebugLoc dl = Op.getDebugLoc();
   2273   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2274   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   2275     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2276     ARMConstantPoolValue *CPV =
   2277       ARMConstantPoolConstant::Create(GV,
   2278                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2279     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2280     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2281     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
   2282                                  CPAddr,
   2283                                  MachinePointerInfo::getConstantPool(),
   2284                                  false, false, false, 0);
   2285     SDValue Chain = Result.getValue(1);
   2286     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   2287     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
   2288     if (!UseGOTOFF)
   2289       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
   2290                            MachinePointerInfo::getGOT(),
   2291                            false, false, false, 0);
   2292     return Result;
   2293   }
   2294 
   2295   // If we have T2 ops, we can materialize the address directly via movt/movw
   2296   // pair. This is always cheaper.
   2297   if (Subtarget->useMovt()) {
   2298     ++NumMovwMovt;
   2299     // FIXME: Once remat is capable of dealing with instructions with register
   2300     // operands, expand this into two nodes.
   2301     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2302                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2303   } else {
   2304     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2305     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2306     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2307                        MachinePointerInfo::getConstantPool(),
   2308                        false, false, false, 0);
   2309   }
   2310 }
   2311 
   2312 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   2313                                                     SelectionDAG &DAG) const {
   2314   EVT PtrVT = getPointerTy();
   2315   DebugLoc dl = Op.getDebugLoc();
   2316   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2317   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2318 
   2319   // FIXME: Enable this for static codegen when tool issues are fixed.  Also
   2320   // update ARMFastISel::ARMMaterializeGV.
   2321   if (Subtarget->useMovt() && RelocM != Reloc::Static) {
   2322     ++NumMovwMovt;
   2323     // FIXME: Once remat is capable of dealing with instructions with register
   2324     // operands, expand this into two nodes.
   2325     if (RelocM == Reloc::Static)
   2326       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2327                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2328 
   2329     unsigned Wrapper = (RelocM == Reloc::PIC_)
   2330       ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
   2331     SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
   2332                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2333     if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2334       Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
   2335                            MachinePointerInfo::getGOT(),
   2336                            false, false, false, 0);
   2337     return Result;
   2338   }
   2339 
   2340   unsigned ARMPCLabelIndex = 0;
   2341   SDValue CPAddr;
   2342   if (RelocM == Reloc::Static) {
   2343     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2344   } else {
   2345     ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   2346     ARMPCLabelIndex = AFI->createPICLabelUId();
   2347     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
   2348     ARMConstantPoolValue *CPV =
   2349       ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
   2350                                       PCAdj);
   2351     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2352   }
   2353   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2354 
   2355   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2356                                MachinePointerInfo::getConstantPool(),
   2357                                false, false, false, 0);
   2358   SDValue Chain = Result.getValue(1);
   2359 
   2360   if (RelocM == Reloc::PIC_) {
   2361     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2362     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2363   }
   2364 
   2365   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2366     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
   2367                          false, false, false, 0);
   2368 
   2369   return Result;
   2370 }
   2371 
   2372 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   2373                                                     SelectionDAG &DAG) const {
   2374   assert(Subtarget->isTargetELF() &&
   2375          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   2376   MachineFunction &MF = DAG.getMachineFunction();
   2377   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2378   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2379   EVT PtrVT = getPointerTy();
   2380   DebugLoc dl = Op.getDebugLoc();
   2381   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2382   ARMConstantPoolValue *CPV =
   2383     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
   2384                                   ARMPCLabelIndex, PCAdj);
   2385   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2386   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2387   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2388                                MachinePointerInfo::getConstantPool(),
   2389                                false, false, false, 0);
   2390   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2391   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2392 }
   2393 
   2394 SDValue
   2395 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   2396   DebugLoc dl = Op.getDebugLoc();
   2397   SDValue Val = DAG.getConstant(0, MVT::i32);
   2398   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
   2399                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
   2400                      Op.getOperand(1), Val);
   2401 }
   2402 
   2403 SDValue
   2404 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   2405   DebugLoc dl = Op.getDebugLoc();
   2406   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
   2407                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
   2408 }
   2409 
   2410 SDValue
   2411 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   2412                                           const ARMSubtarget *Subtarget) const {
   2413   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2414   DebugLoc dl = Op.getDebugLoc();
   2415   switch (IntNo) {
   2416   default: return SDValue();    // Don't custom lower most intrinsics.
   2417   case Intrinsic::arm_thread_pointer: {
   2418     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2419     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2420   }
   2421   case Intrinsic::eh_sjlj_lsda: {
   2422     MachineFunction &MF = DAG.getMachineFunction();
   2423     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2424     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2425     EVT PtrVT = getPointerTy();
   2426     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2427     SDValue CPAddr;
   2428     unsigned PCAdj = (RelocM != Reloc::PIC_)
   2429       ? 0 : (Subtarget->isThumb() ? 4 : 8);
   2430     ARMConstantPoolValue *CPV =
   2431       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
   2432                                       ARMCP::CPLSDA, PCAdj);
   2433     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2434     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2435     SDValue Result =
   2436       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2437                   MachinePointerInfo::getConstantPool(),
   2438                   false, false, false, 0);
   2439 
   2440     if (RelocM == Reloc::PIC_) {
   2441       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2442       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2443     }
   2444     return Result;
   2445   }
   2446   case Intrinsic::arm_neon_vmulls:
   2447   case Intrinsic::arm_neon_vmullu: {
   2448     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
   2449       ? ARMISD::VMULLs : ARMISD::VMULLu;
   2450     return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
   2451                        Op.getOperand(1), Op.getOperand(2));
   2452   }
   2453   }
   2454 }
   2455 
   2456 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
   2457                                const ARMSubtarget *Subtarget) {
   2458   DebugLoc dl = Op.getDebugLoc();
   2459   if (!Subtarget->hasDataBarrier()) {
   2460     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2461     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2462     // here.
   2463     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2464            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2465     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2466                        DAG.getConstant(0, MVT::i32));
   2467   }
   2468 
   2469   SDValue Op5 = Op.getOperand(5);
   2470   bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
   2471   unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   2472   unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   2473   bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
   2474 
   2475   ARM_MB::MemBOpt DMBOpt;
   2476   if (isDeviceBarrier)
   2477     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
   2478   else
   2479     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
   2480   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2481                      DAG.getConstant(DMBOpt, MVT::i32));
   2482 }
   2483 
   2484 
   2485 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
   2486                                  const ARMSubtarget *Subtarget) {
   2487   // FIXME: handle "fence singlethread" more efficiently.
   2488   DebugLoc dl = Op.getDebugLoc();
   2489   if (!Subtarget->hasDataBarrier()) {
   2490     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2491     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2492     // here.
   2493     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2494            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2495     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2496                        DAG.getConstant(0, MVT::i32));
   2497   }
   2498 
   2499   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2500                      DAG.getConstant(ARM_MB::ISH, MVT::i32));
   2501 }
   2502 
   2503 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
   2504                              const ARMSubtarget *Subtarget) {
   2505   // ARM pre v5TE and Thumb1 does not have preload instructions.
   2506   if (!(Subtarget->isThumb2() ||
   2507         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
   2508     // Just preserve the chain.
   2509     return Op.getOperand(0);
   2510 
   2511   DebugLoc dl = Op.getDebugLoc();
   2512   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
   2513   if (!isRead &&
   2514       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
   2515     // ARMv7 with MP extension has PLDW.
   2516     return Op.getOperand(0);
   2517 
   2518   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
   2519   if (Subtarget->isThumb()) {
   2520     // Invert the bits.
   2521     isRead = ~isRead & 1;
   2522     isData = ~isData & 1;
   2523   }
   2524 
   2525   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
   2526                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
   2527                      DAG.getConstant(isData, MVT::i32));
   2528 }
   2529 
   2530 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   2531   MachineFunction &MF = DAG.getMachineFunction();
   2532   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
   2533 
   2534   // vastart just stores the address of the VarArgsFrameIndex slot into the
   2535   // memory location argument.
   2536   DebugLoc dl = Op.getDebugLoc();
   2537   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2538   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   2539   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2540   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
   2541                       MachinePointerInfo(SV), false, false, 0);
   2542 }
   2543 
   2544 SDValue
   2545 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   2546                                         SDValue &Root, SelectionDAG &DAG,
   2547                                         DebugLoc dl) const {
   2548   MachineFunction &MF = DAG.getMachineFunction();
   2549   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2550 
   2551   const TargetRegisterClass *RC;
   2552   if (AFI->isThumb1OnlyFunction())
   2553     RC = &ARM::tGPRRegClass;
   2554   else
   2555     RC = &ARM::GPRRegClass;
   2556 
   2557   // Transform the arguments stored in physical registers into virtual ones.
   2558   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2559   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2560 
   2561   SDValue ArgValue2;
   2562   if (NextVA.isMemLoc()) {
   2563     MachineFrameInfo *MFI = MF.getFrameInfo();
   2564     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
   2565 
   2566     // Create load node to retrieve arguments from the stack.
   2567     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2568     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
   2569                             MachinePointerInfo::getFixedStack(FI),
   2570                             false, false, false, 0);
   2571   } else {
   2572     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
   2573     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2574   }
   2575 
   2576   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
   2577 }
   2578 
   2579 void
   2580 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   2581                                   unsigned &VARegSize, unsigned &VARegSaveSize)
   2582   const {
   2583   unsigned NumGPRs;
   2584   if (CCInfo.isFirstByValRegValid())
   2585     NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
   2586   else {
   2587     unsigned int firstUnalloced;
   2588     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
   2589                                                 sizeof(GPRArgRegs) /
   2590                                                 sizeof(GPRArgRegs[0]));
   2591     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
   2592   }
   2593 
   2594   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
   2595   VARegSize = NumGPRs * 4;
   2596   VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
   2597 }
   2598 
   2599 // The remaining GPRs hold either the beginning of variable-argument
   2600 // data, or the beginning of an aggregate passed by value (usually
   2601 // byval).  Either way, we allocate stack slots adjacent to the data
   2602 // provided by our caller, and store the unallocated registers there.
   2603 // If this is a variadic function, the va_list pointer will begin with
   2604 // these values; otherwise, this reassembles a (byval) structure that
   2605 // was split between registers and memory.
   2606 void
   2607 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   2608                                         DebugLoc dl, SDValue &Chain,
   2609                                         const Value *OrigArg,
   2610                                         unsigned OffsetFromOrigArg,
   2611                                         unsigned ArgOffset,
   2612                                         bool ForceMutable) const {
   2613   MachineFunction &MF = DAG.getMachineFunction();
   2614   MachineFrameInfo *MFI = MF.getFrameInfo();
   2615   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2616   unsigned firstRegToSaveIndex;
   2617   if (CCInfo.isFirstByValRegValid())
   2618     firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
   2619   else {
   2620     firstRegToSaveIndex = CCInfo.getFirstUnallocated
   2621       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
   2622   }
   2623 
   2624   unsigned VARegSize, VARegSaveSize;
   2625   computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
   2626   if (VARegSaveSize) {
   2627     // If this function is vararg, store any remaining integer argument regs
   2628     // to their spots on the stack so that they may be loaded by deferencing
   2629     // the result of va_next.
   2630     AFI->setVarArgsRegSaveSize(VARegSaveSize);
   2631     AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
   2632                                                      ArgOffset + VARegSaveSize
   2633                                                      - VARegSize,
   2634                                                      false));
   2635     SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
   2636                                     getPointerTy());
   2637 
   2638     SmallVector<SDValue, 4> MemOps;
   2639     for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
   2640       const TargetRegisterClass *RC;
   2641       if (AFI->isThumb1OnlyFunction())
   2642         RC = &ARM::tGPRRegClass;
   2643       else
   2644         RC = &ARM::GPRRegClass;
   2645 
   2646       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
   2647       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
   2648       SDValue Store =
   2649         DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2650                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
   2651                      false, false, 0);
   2652       MemOps.push_back(Store);
   2653       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
   2654                         DAG.getConstant(4, getPointerTy()));
   2655     }
   2656     if (!MemOps.empty())
   2657       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2658                           &MemOps[0], MemOps.size());
   2659   } else
   2660     // This will point to the next argument passed via stack.
   2661     AFI->setVarArgsFrameIndex(
   2662         MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
   2663 }
   2664 
   2665 SDValue
   2666 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   2667                                         CallingConv::ID CallConv, bool isVarArg,
   2668                                         const SmallVectorImpl<ISD::InputArg>
   2669                                           &Ins,
   2670                                         DebugLoc dl, SelectionDAG &DAG,
   2671                                         SmallVectorImpl<SDValue> &InVals)
   2672                                           const {
   2673   MachineFunction &MF = DAG.getMachineFunction();
   2674   MachineFrameInfo *MFI = MF.getFrameInfo();
   2675 
   2676   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2677 
   2678   // Assign locations to all of the incoming arguments.
   2679   SmallVector<CCValAssign, 16> ArgLocs;
   2680   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2681                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
   2682   CCInfo.AnalyzeFormalArguments(Ins,
   2683                                 CCAssignFnForNode(CallConv, /* Return*/ false,
   2684                                                   isVarArg));
   2685 
   2686   SmallVector<SDValue, 16> ArgValues;
   2687   int lastInsIndex = -1;
   2688   SDValue ArgValue;
   2689   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   2690   unsigned CurArgIdx = 0;
   2691   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2692     CCValAssign &VA = ArgLocs[i];
   2693     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
   2694     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
   2695     // Arguments stored in registers.
   2696     if (VA.isRegLoc()) {
   2697       EVT RegVT = VA.getLocVT();
   2698 
   2699       if (VA.needsCustom()) {
   2700         // f64 and vector types are split up into multiple registers or
   2701         // combinations of registers and stack slots.
   2702         if (VA.getLocVT() == MVT::v2f64) {
   2703           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
   2704                                                    Chain, DAG, dl);
   2705           VA = ArgLocs[++i]; // skip ahead to next loc
   2706           SDValue ArgValue2;
   2707           if (VA.isMemLoc()) {
   2708             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
   2709             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2710             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
   2711                                     MachinePointerInfo::getFixedStack(FI),
   2712                                     false, false, false, 0);
   2713           } else {
   2714             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
   2715                                              Chain, DAG, dl);
   2716           }
   2717           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   2718           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2719                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
   2720           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2721                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
   2722         } else
   2723           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
   2724 
   2725       } else {
   2726         const TargetRegisterClass *RC;
   2727 
   2728         if (RegVT == MVT::f32)
   2729           RC = &ARM::SPRRegClass;
   2730         else if (RegVT == MVT::f64)
   2731           RC = &ARM::DPRRegClass;
   2732         else if (RegVT == MVT::v2f64)
   2733           RC = &ARM::QPRRegClass;
   2734         else if (RegVT == MVT::i32)
   2735           RC = AFI->isThumb1OnlyFunction() ?
   2736             (const TargetRegisterClass*)&ARM::tGPRRegClass :
   2737             (const TargetRegisterClass*)&ARM::GPRRegClass;
   2738         else
   2739           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
   2740 
   2741         // Transform the arguments in physical registers into virtual ones.
   2742         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2743         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
   2744       }
   2745 
   2746       // If this is an 8 or 16-bit value, it is really passed promoted
   2747       // to 32 bits.  Insert an assert[sz]ext to capture this, then
   2748       // truncate to the right size.
   2749       switch (VA.getLocInfo()) {
   2750       default: llvm_unreachable("Unknown loc info!");
   2751       case CCValAssign::Full: break;
   2752       case CCValAssign::BCvt:
   2753         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
   2754         break;
   2755       case CCValAssign::SExt:
   2756         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
   2757                                DAG.getValueType(VA.getValVT()));
   2758         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2759         break;
   2760       case CCValAssign::ZExt:
   2761         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
   2762                                DAG.getValueType(VA.getValVT()));
   2763         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2764         break;
   2765       }
   2766 
   2767       InVals.push_back(ArgValue);
   2768 
   2769     } else { // VA.isRegLoc()
   2770 
   2771       // sanity check
   2772       assert(VA.isMemLoc());
   2773       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
   2774 
   2775       int index = ArgLocs[i].getValNo();
   2776 
   2777       // Some Ins[] entries become multiple ArgLoc[] entries.
   2778       // Process them only once.
   2779       if (index != lastInsIndex)
   2780         {
   2781           ISD::ArgFlagsTy Flags = Ins[index].Flags;
   2782           // FIXME: For now, all byval parameter objects are marked mutable.
   2783           // This can be changed with more analysis.
   2784           // In case of tail call optimization mark all arguments mutable.
   2785           // Since they could be overwritten by lowering of arguments in case of
   2786           // a tail call.
   2787           if (Flags.isByVal()) {
   2788             ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2789             if (!AFI->getVarArgsFrameIndex()) {
   2790               VarArgStyleRegisters(CCInfo, DAG,
   2791                                    dl, Chain, CurOrigArg,
   2792                                    Ins[VA.getValNo()].PartOffset,
   2793                                    VA.getLocMemOffset(),
   2794                                    true /*force mutable frames*/);
   2795               int VAFrameIndex = AFI->getVarArgsFrameIndex();
   2796               InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
   2797             } else {
   2798               int FI = MFI->CreateFixedObject(Flags.getByValSize(),
   2799                                               VA.getLocMemOffset(), false);
   2800               InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
   2801             }
   2802           } else {
   2803             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
   2804                                             VA.getLocMemOffset(), true);
   2805 
   2806             // Create load nodes to retrieve arguments from the stack.
   2807             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2808             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
   2809                                          MachinePointerInfo::getFixedStack(FI),
   2810                                          false, false, false, 0));
   2811           }
   2812           lastInsIndex = index;
   2813         }
   2814     }
   2815   }
   2816 
   2817   // varargs
   2818   if (isVarArg)
   2819     VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
   2820                          CCInfo.getNextStackOffset());
   2821 
   2822   return Chain;
   2823 }
   2824 
   2825 /// isFloatingPointZero - Return true if this is +0.0.
   2826 static bool isFloatingPointZero(SDValue Op) {
   2827   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
   2828     return CFP->getValueAPF().isPosZero();
   2829   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
   2830     // Maybe this has already been legalized into the constant pool?
   2831     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
   2832       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
   2833       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
   2834         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
   2835           return CFP->getValueAPF().isPosZero();
   2836     }
   2837   }
   2838   return false;
   2839 }
   2840 
   2841 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
   2842 /// the given operands.
   2843 SDValue
   2844 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   2845                              SDValue &ARMcc, SelectionDAG &DAG,
   2846                              DebugLoc dl) const {
   2847   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
   2848     unsigned C = RHSC->getZExtValue();
   2849     if (!isLegalICmpImmediate(C)) {
   2850       // Constant does not fit, try adjusting it by one?
   2851       switch (CC) {
   2852       default: break;
   2853       case ISD::SETLT:
   2854       case ISD::SETGE:
   2855         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
   2856           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
   2857           RHS = DAG.getConstant(C-1, MVT::i32);
   2858         }
   2859         break;
   2860       case ISD::SETULT:
   2861       case ISD::SETUGE:
   2862         if (C != 0 && isLegalICmpImmediate(C-1)) {
   2863           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
   2864           RHS = DAG.getConstant(C-1, MVT::i32);
   2865         }
   2866         break;
   2867       case ISD::SETLE:
   2868       case ISD::SETGT:
   2869         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
   2870           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
   2871           RHS = DAG.getConstant(C+1, MVT::i32);
   2872         }
   2873         break;
   2874       case ISD::SETULE:
   2875       case ISD::SETUGT:
   2876         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
   2877           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
   2878           RHS = DAG.getConstant(C+1, MVT::i32);
   2879         }
   2880         break;
   2881       }
   2882     }
   2883   }
   2884 
   2885   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   2886   ARMISD::NodeType CompareType;
   2887   switch (CondCode) {
   2888   default:
   2889     CompareType = ARMISD::CMP;
   2890     break;
   2891   case ARMCC::EQ:
   2892   case ARMCC::NE:
   2893     // Uses only Z Flag
   2894     CompareType = ARMISD::CMPZ;
   2895     break;
   2896   }
   2897   ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2898   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
   2899 }
   2900 
   2901 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
   2902 SDValue
   2903 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   2904                              DebugLoc dl) const {
   2905   SDValue Cmp;
   2906   if (!isFloatingPointZero(RHS))
   2907     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
   2908   else
   2909     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
   2910   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
   2911 }
   2912 
   2913 /// duplicateCmp - Glue values can have only one use, so this function
   2914 /// duplicates a comparison node.
   2915 SDValue
   2916 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
   2917   unsigned Opc = Cmp.getOpcode();
   2918   DebugLoc DL = Cmp.getDebugLoc();
   2919   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
   2920     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2921 
   2922   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
   2923   Cmp = Cmp.getOperand(0);
   2924   Opc = Cmp.getOpcode();
   2925   if (Opc == ARMISD::CMPFP)
   2926     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2927   else {
   2928     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
   2929     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
   2930   }
   2931   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
   2932 }
   2933 
   2934 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   2935   SDValue Cond = Op.getOperand(0);
   2936   SDValue SelectTrue = Op.getOperand(1);
   2937   SDValue SelectFalse = Op.getOperand(2);
   2938   DebugLoc dl = Op.getDebugLoc();
   2939 
   2940   // Convert:
   2941   //
   2942   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
   2943   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
   2944   //
   2945   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
   2946     const ConstantSDNode *CMOVTrue =
   2947       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
   2948     const ConstantSDNode *CMOVFalse =
   2949       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
   2950 
   2951     if (CMOVTrue && CMOVFalse) {
   2952       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
   2953       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
   2954 
   2955       SDValue True;
   2956       SDValue False;
   2957       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
   2958         True = SelectTrue;
   2959         False = SelectFalse;
   2960       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
   2961         True = SelectFalse;
   2962         False = SelectTrue;
   2963       }
   2964 
   2965       if (True.getNode() && False.getNode()) {
   2966         EVT VT = Op.getValueType();
   2967         SDValue ARMcc = Cond.getOperand(2);
   2968         SDValue CCR = Cond.getOperand(3);
   2969         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
   2970         assert(True.getValueType() == VT);
   2971         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
   2972       }
   2973     }
   2974   }
   2975 
   2976   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
   2977   // undefined bits before doing a full-word comparison with zero.
   2978   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
   2979                      DAG.getConstant(1, Cond.getValueType()));
   2980 
   2981   return DAG.getSelectCC(dl, Cond,
   2982                          DAG.getConstant(0, Cond.getValueType()),
   2983                          SelectTrue, SelectFalse, ISD::SETNE);
   2984 }
   2985 
   2986 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   2987   EVT VT = Op.getValueType();
   2988   SDValue LHS = Op.getOperand(0);
   2989   SDValue RHS = Op.getOperand(1);
   2990   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   2991   SDValue TrueVal = Op.getOperand(2);
   2992   SDValue FalseVal = Op.getOperand(3);
   2993   DebugLoc dl = Op.getDebugLoc();
   2994 
   2995   if (LHS.getValueType() == MVT::i32) {
   2996     SDValue ARMcc;
   2997     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2998     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   2999     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
   3000   }
   3001 
   3002   ARMCC::CondCodes CondCode, CondCode2;
   3003   FPCCToARMCC(CC, CondCode, CondCode2);
   3004 
   3005   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3006   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3007   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3008   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
   3009                                ARMcc, CCR, Cmp);
   3010   if (CondCode2 != ARMCC::AL) {
   3011     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
   3012     // FIXME: Needs another CMP because flag can have but one use.
   3013     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
   3014     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
   3015                          Result, TrueVal, ARMcc2, CCR, Cmp2);
   3016   }
   3017   return Result;
   3018 }
   3019 
   3020 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
   3021 /// to morph to an integer compare sequence.
   3022 static bool canChangeToInt(SDValue Op, bool &SeenZero,
   3023                            const ARMSubtarget *Subtarget) {
   3024   SDNode *N = Op.getNode();
   3025   if (!N->hasOneUse())
   3026     // Otherwise it requires moving the value from fp to integer registers.
   3027     return false;
   3028   if (!N->getNumValues())
   3029     return false;
   3030   EVT VT = Op.getValueType();
   3031   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
   3032     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
   3033     // vmrs are very slow, e.g. cortex-a8.
   3034     return false;
   3035 
   3036   if (isFloatingPointZero(Op)) {
   3037     SeenZero = true;
   3038     return true;
   3039   }
   3040   return ISD::isNormalLoad(N);
   3041 }
   3042 
   3043 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
   3044   if (isFloatingPointZero(Op))
   3045     return DAG.getConstant(0, MVT::i32);
   3046 
   3047   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
   3048     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3049                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
   3050                        Ld->isVolatile(), Ld->isNonTemporal(),
   3051                        Ld->isInvariant(), Ld->getAlignment());
   3052 
   3053   llvm_unreachable("Unknown VFP cmp argument!");
   3054 }
   3055 
   3056 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
   3057                            SDValue &RetVal1, SDValue &RetVal2) {
   3058   if (isFloatingPointZero(Op)) {
   3059     RetVal1 = DAG.getConstant(0, MVT::i32);
   3060     RetVal2 = DAG.getConstant(0, MVT::i32);
   3061     return;
   3062   }
   3063 
   3064   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
   3065     SDValue Ptr = Ld->getBasePtr();
   3066     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3067                           Ld->getChain(), Ptr,
   3068                           Ld->getPointerInfo(),
   3069                           Ld->isVolatile(), Ld->isNonTemporal(),
   3070                           Ld->isInvariant(), Ld->getAlignment());
   3071 
   3072     EVT PtrType = Ptr.getValueType();
   3073     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
   3074     SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
   3075                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
   3076     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3077                           Ld->getChain(), NewPtr,
   3078                           Ld->getPointerInfo().getWithOffset(4),
   3079                           Ld->isVolatile(), Ld->isNonTemporal(),
   3080                           Ld->isInvariant(), NewAlign);
   3081     return;
   3082   }
   3083 
   3084   llvm_unreachable("Unknown VFP cmp argument!");
   3085 }
   3086 
   3087 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
   3088 /// f32 and even f64 comparisons to integer ones.
   3089 SDValue
   3090 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   3091   SDValue Chain = Op.getOperand(0);
   3092   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3093   SDValue LHS = Op.getOperand(2);
   3094   SDValue RHS = Op.getOperand(3);
   3095   SDValue Dest = Op.getOperand(4);
   3096   DebugLoc dl = Op.getDebugLoc();
   3097 
   3098   bool LHSSeenZero = false;
   3099   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
   3100   bool RHSSeenZero = false;
   3101   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
   3102   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
   3103     // If unsafe fp math optimization is enabled and there are no other uses of
   3104     // the CMP operands, and the condition code is EQ or NE, we can optimize it
   3105     // to an integer comparison.
   3106     if (CC == ISD::SETOEQ)
   3107       CC = ISD::SETEQ;
   3108     else if (CC == ISD::SETUNE)
   3109       CC = ISD::SETNE;
   3110 
   3111     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
   3112     SDValue ARMcc;
   3113     if (LHS.getValueType() == MVT::f32) {
   3114       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3115                         bitcastf32Toi32(LHS, DAG), Mask);
   3116       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3117                         bitcastf32Toi32(RHS, DAG), Mask);
   3118       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3119       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3120       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3121                          Chain, Dest, ARMcc, CCR, Cmp);
   3122     }
   3123 
   3124     SDValue LHS1, LHS2;
   3125     SDValue RHS1, RHS2;
   3126     expandf64Toi32(LHS, DAG, LHS1, LHS2);
   3127     expandf64Toi32(RHS, DAG, RHS1, RHS2);
   3128     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
   3129     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
   3130     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   3131     ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3132     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3133     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
   3134     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
   3135   }
   3136 
   3137   return SDValue();
   3138 }
   3139 
   3140 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   3141   SDValue Chain = Op.getOperand(0);
   3142   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3143   SDValue LHS = Op.getOperand(2);
   3144   SDValue RHS = Op.getOperand(3);
   3145   SDValue Dest = Op.getOperand(4);
   3146   DebugLoc dl = Op.getDebugLoc();
   3147 
   3148   if (LHS.getValueType() == MVT::i32) {
   3149     SDValue ARMcc;
   3150     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3151     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3152     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3153                        Chain, Dest, ARMcc, CCR, Cmp);
   3154   }
   3155 
   3156   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   3157 
   3158   if (getTargetMachine().Options.UnsafeFPMath &&
   3159       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
   3160        CC == ISD::SETNE || CC == ISD::SETUNE)) {
   3161     SDValue Result = OptimizeVFPBrcond(Op, DAG);
   3162     if (Result.getNode())
   3163       return Result;
   3164   }
   3165 
   3166   ARMCC::CondCodes CondCode, CondCode2;
   3167   FPCCToARMCC(CC, CondCode, CondCode2);
   3168 
   3169   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3170   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3171   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3172   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3173   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   3174   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3175   if (CondCode2 != ARMCC::AL) {
   3176     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
   3177     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
   3178     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3179   }
   3180   return Res;
   3181 }
   3182 
   3183 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   3184   SDValue Chain = Op.getOperand(0);
   3185   SDValue Table = Op.getOperand(1);
   3186   SDValue Index = Op.getOperand(2);
   3187   DebugLoc dl = Op.getDebugLoc();
   3188 
   3189   EVT PTy = getPointerTy();
   3190   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   3191   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   3192   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   3193   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   3194   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
   3195   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
   3196   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
   3197   if (Subtarget->isThumb2()) {
   3198     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
   3199     // which does another jump to the destination. This also makes it easier
   3200     // to translate it to TBB / TBH later.
   3201     // FIXME: This might not work if the function is extremely large.
   3202     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
   3203                        Addr, Op.getOperand(2), JTI, UId);
   3204   }
   3205   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   3206     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
   3207                        MachinePointerInfo::getJumpTable(),
   3208                        false, false, false, 0);
   3209     Chain = Addr.getValue(1);
   3210     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
   3211     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3212   } else {
   3213     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
   3214                        MachinePointerInfo::getJumpTable(),
   3215                        false, false, false, 0);
   3216     Chain = Addr.getValue(1);
   3217     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3218   }
   3219 }
   3220 
   3221 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3222   EVT VT = Op.getValueType();
   3223   DebugLoc dl = Op.getDebugLoc();
   3224 
   3225   if (Op.getValueType().getVectorElementType() == MVT::i32) {
   3226     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
   3227       return Op;
   3228     return DAG.UnrollVectorOp(Op.getNode());
   3229   }
   3230 
   3231   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
   3232          "Invalid type for custom lowering!");
   3233   if (VT != MVT::v4i16)
   3234     return DAG.UnrollVectorOp(Op.getNode());
   3235 
   3236   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
   3237   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
   3238 }
   3239 
   3240 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3241   EVT VT = Op.getValueType();
   3242   if (VT.isVector())
   3243     return LowerVectorFP_TO_INT(Op, DAG);
   3244 
   3245   DebugLoc dl = Op.getDebugLoc();
   3246   unsigned Opc;
   3247 
   3248   switch (Op.getOpcode()) {
   3249   default: llvm_unreachable("Invalid opcode!");
   3250   case ISD::FP_TO_SINT:
   3251     Opc = ARMISD::FTOSI;
   3252     break;
   3253   case ISD::FP_TO_UINT:
   3254     Opc = ARMISD::FTOUI;
   3255     break;
   3256   }
   3257   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
   3258   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3259 }
   3260 
   3261 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3262   EVT VT = Op.getValueType();
   3263   DebugLoc dl = Op.getDebugLoc();
   3264 
   3265   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
   3266     if (VT.getVectorElementType() == MVT::f32)
   3267       return Op;
   3268     return DAG.UnrollVectorOp(Op.getNode());
   3269   }
   3270 
   3271   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
   3272          "Invalid type for custom lowering!");
   3273   if (VT != MVT::v4f32)
   3274     return DAG.UnrollVectorOp(Op.getNode());
   3275 
   3276   unsigned CastOpc;
   3277   unsigned Opc;
   3278   switch (Op.getOpcode()) {
   3279   default: llvm_unreachable("Invalid opcode!");
   3280   case ISD::SINT_TO_FP:
   3281     CastOpc = ISD::SIGN_EXTEND;
   3282     Opc = ISD::SINT_TO_FP;
   3283     break;
   3284   case ISD::UINT_TO_FP:
   3285     CastOpc = ISD::ZERO_EXTEND;
   3286     Opc = ISD::UINT_TO_FP;
   3287     break;
   3288   }
   3289 
   3290   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
   3291   return DAG.getNode(Opc, dl, VT, Op);
   3292 }
   3293 
   3294 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3295   EVT VT = Op.getValueType();
   3296   if (VT.isVector())
   3297     return LowerVectorINT_TO_FP(Op, DAG);
   3298 
   3299   DebugLoc dl = Op.getDebugLoc();
   3300   unsigned Opc;
   3301 
   3302   switch (Op.getOpcode()) {
   3303   default: llvm_unreachable("Invalid opcode!");
   3304   case ISD::SINT_TO_FP:
   3305     Opc = ARMISD::SITOF;
   3306     break;
   3307   case ISD::UINT_TO_FP:
   3308     Opc = ARMISD::UITOF;
   3309     break;
   3310   }
   3311 
   3312   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   3313   return DAG.getNode(Opc, dl, VT, Op);
   3314 }
   3315 
   3316 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   3317   // Implement fcopysign with a fabs and a conditional fneg.
   3318   SDValue Tmp0 = Op.getOperand(0);
   3319   SDValue Tmp1 = Op.getOperand(1);
   3320   DebugLoc dl = Op.getDebugLoc();
   3321   EVT VT = Op.getValueType();
   3322   EVT SrcVT = Tmp1.getValueType();
   3323   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
   3324     Tmp0.getOpcode() == ARMISD::VMOVDRR;
   3325   bool UseNEON = !InGPR && Subtarget->hasNEON();
   3326 
   3327   if (UseNEON) {
   3328     // Use VBSL to copy the sign bit.
   3329     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
   3330     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
   3331                                DAG.getTargetConstant(EncodedVal, MVT::i32));
   3332     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
   3333     if (VT == MVT::f64)
   3334       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3335                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
   3336                          DAG.getConstant(32, MVT::i32));
   3337     else /*if (VT == MVT::f32)*/
   3338       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
   3339     if (SrcVT == MVT::f32) {
   3340       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
   3341       if (VT == MVT::f64)
   3342         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3343                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
   3344                            DAG.getConstant(32, MVT::i32));
   3345     } else if (VT == MVT::f32)
   3346       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
   3347                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
   3348                          DAG.getConstant(32, MVT::i32));
   3349     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
   3350     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
   3351 
   3352     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
   3353                                             MVT::i32);
   3354     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
   3355     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
   3356                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
   3357 
   3358     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
   3359                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
   3360                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
   3361     if (VT == MVT::f32) {
   3362       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
   3363       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
   3364                         DAG.getConstant(0, MVT::i32));
   3365     } else {
   3366       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
   3367     }
   3368 
   3369     return Res;
   3370   }
   3371 
   3372   // Bitcast operand 1 to i32.
   3373   if (SrcVT == MVT::f64)
   3374     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3375                        &Tmp1, 1).getValue(1);
   3376   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
   3377 
   3378   // Or in the signbit with integer operations.
   3379   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
   3380   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
   3381   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
   3382   if (VT == MVT::f32) {
   3383     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
   3384                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
   3385     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
   3386                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
   3387   }
   3388 
   3389   // f64: Or the high part with signbit and then combine two parts.
   3390   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3391                      &Tmp0, 1);
   3392   SDValue Lo = Tmp0.getValue(0);
   3393   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
   3394   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
   3395   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   3396 }
   3397 
   3398 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   3399   MachineFunction &MF = DAG.getMachineFunction();
   3400   MachineFrameInfo *MFI = MF.getFrameInfo();
   3401   MFI->setReturnAddressIsTaken(true);
   3402 
   3403   EVT VT = Op.getValueType();
   3404   DebugLoc dl = Op.getDebugLoc();
   3405   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3406   if (Depth) {
   3407     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   3408     SDValue Offset = DAG.getConstant(4, MVT::i32);
   3409     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
   3410                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
   3411                        MachinePointerInfo(), false, false, false, 0);
   3412   }
   3413 
   3414   // Return LR, which contains the return address. Mark it an implicit live-in.
   3415   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
   3416   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   3417 }
   3418 
   3419 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   3420   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   3421   MFI->setFrameAddressIsTaken(true);
   3422 
   3423   EVT VT = Op.getValueType();
   3424   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   3425   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3426   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
   3427     ? ARM::R7 : ARM::R11;
   3428   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   3429   while (Depth--)
   3430     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
   3431                             MachinePointerInfo(),
   3432                             false, false, false, 0);
   3433   return FrameAddr;
   3434 }
   3435 
   3436 /// ExpandBITCAST - If the target supports VFP, this function is called to
   3437 /// expand a bit convert where either the source or destination type is i64 to
   3438 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
   3439 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
   3440 /// vectors), since the legalizer won't know what to do with that.
   3441 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   3442   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   3443   DebugLoc dl = N->getDebugLoc();
   3444   SDValue Op = N->getOperand(0);
   3445 
   3446   // This function is only supposed to be called for i64 types, either as the
   3447   // source or destination of the bit convert.
   3448   EVT SrcVT = Op.getValueType();
   3449   EVT DstVT = N->getValueType(0);
   3450   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
   3451          "ExpandBITCAST called for non-i64 type");
   3452 
   3453   // Turn i64->f64 into VMOVDRR.
   3454   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
   3455     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3456                              DAG.getConstant(0, MVT::i32));
   3457     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3458                              DAG.getConstant(1, MVT::i32));
   3459     return DAG.getNode(ISD::BITCAST, dl, DstVT,
   3460                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   3461   }
   3462 
   3463   // Turn f64->i64 into VMOVRRD.
   3464   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
   3465     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
   3466                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
   3467     // Merge the pieces into a single i64 value.
   3468     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
   3469   }
   3470 
   3471   return SDValue();
   3472 }
   3473 
   3474 /// getZeroVector - Returns a vector of specified type with all zero elements.
   3475 /// Zero vectors are used to represent vector negation and in those cases
   3476 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
   3477 /// not support i64 elements, so sometimes the zero vectors will need to be
   3478 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
   3479 /// zero vector.
   3480 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   3481   assert(VT.isVector() && "Expected a vector type");
   3482   // The canonical modified immediate encoding of a zero vector is....0!
   3483   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
   3484   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   3485   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
   3486   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   3487 }
   3488 
   3489 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
   3490 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3491 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   3492                                                 SelectionDAG &DAG) const {
   3493   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3494   EVT VT = Op.getValueType();
   3495   unsigned VTBits = VT.getSizeInBits();
   3496   DebugLoc dl = Op.getDebugLoc();
   3497   SDValue ShOpLo = Op.getOperand(0);
   3498   SDValue ShOpHi = Op.getOperand(1);
   3499   SDValue ShAmt  = Op.getOperand(2);
   3500   SDValue ARMcc;
   3501   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
   3502 
   3503   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
   3504 
   3505   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3506                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3507   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   3508   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3509                                    DAG.getConstant(VTBits, MVT::i32));
   3510   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   3511   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3512   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   3513 
   3514   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3515   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3516                           ARMcc, DAG, dl);
   3517   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   3518   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
   3519                            CCR, Cmp);
   3520 
   3521   SDValue Ops[2] = { Lo, Hi };
   3522   return DAG.getMergeValues(Ops, 2, dl);
   3523 }
   3524 
   3525 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
   3526 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3527 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   3528                                                SelectionDAG &DAG) const {
   3529   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3530   EVT VT = Op.getValueType();
   3531   unsigned VTBits = VT.getSizeInBits();
   3532   DebugLoc dl = Op.getDebugLoc();
   3533   SDValue ShOpLo = Op.getOperand(0);
   3534   SDValue ShOpHi = Op.getOperand(1);
   3535   SDValue ShAmt  = Op.getOperand(2);
   3536   SDValue ARMcc;
   3537 
   3538   assert(Op.getOpcode() == ISD::SHL_PARTS);
   3539   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3540                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3541   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   3542   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3543                                    DAG.getConstant(VTBits, MVT::i32));
   3544   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   3545   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   3546 
   3547   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3548   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3549   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3550                           ARMcc, DAG, dl);
   3551   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   3552   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
   3553                            CCR, Cmp);
   3554 
   3555   SDValue Ops[2] = { Lo, Hi };
   3556   return DAG.getMergeValues(Ops, 2, dl);
   3557 }
   3558 
   3559 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   3560                                             SelectionDAG &DAG) const {
   3561   // The rounding mode is in bits 23:22 of the FPSCR.
   3562   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
   3563   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
   3564   // so that the shift + and get folded into a bitfield extract.
   3565   DebugLoc dl = Op.getDebugLoc();
   3566   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
   3567                               DAG.getConstant(Intrinsic::arm_get_fpscr,
   3568                                               MVT::i32));
   3569   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
   3570                                   DAG.getConstant(1U << 22, MVT::i32));
   3571   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
   3572                               DAG.getConstant(22, MVT::i32));
   3573   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
   3574                      DAG.getConstant(3, MVT::i32));
   3575 }
   3576 
   3577 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   3578                          const ARMSubtarget *ST) {
   3579   EVT VT = N->getValueType(0);
   3580   DebugLoc dl = N->getDebugLoc();
   3581 
   3582   if (!ST->hasV6T2Ops())
   3583     return SDValue();
   3584 
   3585   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
   3586   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
   3587 }
   3588 
   3589 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
   3590 /// for each 16-bit element from operand, repeated.  The basic idea is to
   3591 /// leverage vcnt to get the 8-bit counts, gather and add the results.
   3592 ///
   3593 /// Trace for v4i16:
   3594 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
   3595 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
   3596 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
   3597 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
   3598 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
   3599 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
   3600 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
   3601 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
   3602 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
   3603   EVT VT = N->getValueType(0);
   3604   DebugLoc DL = N->getDebugLoc();
   3605 
   3606   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
   3607   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
   3608   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
   3609   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
   3610   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
   3611   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
   3612 }
   3613 
   3614 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
   3615 /// bit-count for each 16-bit element from the operand.  We need slightly
   3616 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
   3617 /// 64/128-bit registers.
   3618 ///
   3619 /// Trace for v4i16:
   3620 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
   3621 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
   3622 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
   3623 /// v4i16:Extracted = [k0    k1    k2    k3    ]
   3624 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
   3625   EVT VT = N->getValueType(0);
   3626   DebugLoc DL = N->getDebugLoc();
   3627 
   3628   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
   3629   if (VT.is64BitVector()) {
   3630     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
   3631     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
   3632                        DAG.getIntPtrConstant(0));
   3633   } else {
   3634     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
   3635                                     BitCounts, DAG.getIntPtrConstant(0));
   3636     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
   3637   }
   3638 }
   3639 
   3640 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
   3641 /// bit-count for each 32-bit element from the operand.  The idea here is
   3642 /// to split the vector into 16-bit elements, leverage the 16-bit count
   3643 /// routine, and then combine the results.
   3644 ///
   3645 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
   3646 /// input    = [v0    v1    ] (vi: 32-bit elements)
   3647 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
   3648 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
   3649 /// vrev: N0 = [k1 k0 k3 k2 ]
   3650 ///            [k0 k1 k2 k3 ]
   3651 ///       N1 =+[k1 k0 k3 k2 ]
   3652 ///            [k0 k2 k1 k3 ]
   3653 ///       N2 =+[k1 k3 k0 k2 ]
   3654 ///            [k0    k2    k1    k3    ]
   3655 /// Extended =+[k1    k3    k0    k2    ]
   3656 ///            [k0    k2    ]
   3657 /// Extracted=+[k1    k3    ]
   3658 ///
   3659 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
   3660   EVT VT = N->getValueType(0);
   3661   DebugLoc DL = N->getDebugLoc();
   3662 
   3663   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
   3664 
   3665   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
   3666   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
   3667   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
   3668   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
   3669   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
   3670 
   3671   if (VT.is64BitVector()) {
   3672     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
   3673     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
   3674                        DAG.getIntPtrConstant(0));
   3675   } else {
   3676     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
   3677                                     DAG.getIntPtrConstant(0));
   3678     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
   3679   }
   3680 }
   3681 
   3682 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
   3683                           const ARMSubtarget *ST) {
   3684   EVT VT = N->getValueType(0);
   3685 
   3686   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
   3687   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
   3688           VT == MVT::v4i16 || VT == MVT::v8i16) &&
   3689          "Unexpected type for custom ctpop lowering");
   3690 
   3691   if (VT.getVectorElementType() == MVT::i32)
   3692     return lowerCTPOP32BitElements(N, DAG);
   3693   else
   3694     return lowerCTPOP16BitElements(N, DAG);
   3695 }
   3696 
   3697 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   3698                           const ARMSubtarget *ST) {
   3699   EVT VT = N->getValueType(0);
   3700   DebugLoc dl = N->getDebugLoc();
   3701 
   3702   if (!VT.isVector())
   3703     return SDValue();
   3704 
   3705   // Lower vector shifts on NEON to use VSHL.
   3706   assert(ST->hasNEON() && "unexpected vector shift");
   3707 
   3708   // Left shifts translate directly to the vshiftu intrinsic.
   3709   if (N->getOpcode() == ISD::SHL)
   3710     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3711                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
   3712                        N->getOperand(0), N->getOperand(1));
   3713 
   3714   assert((N->getOpcode() == ISD::SRA ||
   3715           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
   3716 
   3717   // NEON uses the same intrinsics for both left and right shifts.  For
   3718   // right shifts, the shift amounts are negative, so negate the vector of
   3719   // shift amounts.
   3720   EVT ShiftVT = N->getOperand(1).getValueType();
   3721   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
   3722                                      getZeroVector(ShiftVT, DAG, dl),
   3723                                      N->getOperand(1));
   3724   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
   3725                              Intrinsic::arm_neon_vshifts :
   3726                              Intrinsic::arm_neon_vshiftu);
   3727   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3728                      DAG.getConstant(vshiftInt, MVT::i32),
   3729                      N->getOperand(0), NegatedCount);
   3730 }
   3731 
   3732 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
   3733                                 const ARMSubtarget *ST) {
   3734   EVT VT = N->getValueType(0);
   3735   DebugLoc dl = N->getDebugLoc();
   3736 
   3737   // We can get here for a node like i32 = ISD::SHL i32, i64
   3738   if (VT != MVT::i64)
   3739     return SDValue();
   3740 
   3741   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
   3742          "Unknown shift to lower!");
   3743 
   3744   // We only lower SRA, SRL of 1 here, all others use generic lowering.
   3745   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
   3746       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
   3747     return SDValue();
   3748 
   3749   // If we are in thumb mode, we don't have RRX.
   3750   if (ST->isThumb1Only()) return SDValue();
   3751 
   3752   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   3753   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3754                            DAG.getConstant(0, MVT::i32));
   3755   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3756                            DAG.getConstant(1, MVT::i32));
   3757 
   3758   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   3759   // captures the result into a carry flag.
   3760   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
   3761   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
   3762 
   3763   // The low part is an ARMISD::RRX operand, which shifts the carry in.
   3764   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
   3765 
   3766   // Merge the pieces into a single i64 value.
   3767  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
   3768 }
   3769 
   3770 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   3771   SDValue TmpOp0, TmpOp1;
   3772   bool Invert = false;
   3773   bool Swap = false;
   3774   unsigned Opc = 0;
   3775 
   3776   SDValue Op0 = Op.getOperand(0);
   3777   SDValue Op1 = Op.getOperand(1);
   3778   SDValue CC = Op.getOperand(2);
   3779   EVT VT = Op.getValueType();
   3780   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   3781   DebugLoc dl = Op.getDebugLoc();
   3782 
   3783   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
   3784     switch (SetCCOpcode) {
   3785     default: llvm_unreachable("Illegal FP comparison");
   3786     case ISD::SETUNE:
   3787     case ISD::SETNE:  Invert = true; // Fallthrough
   3788     case ISD::SETOEQ:
   3789     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3790     case ISD::SETOLT:
   3791     case ISD::SETLT: Swap = true; // Fallthrough
   3792     case ISD::SETOGT:
   3793     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3794     case ISD::SETOLE:
   3795     case ISD::SETLE:  Swap = true; // Fallthrough
   3796     case ISD::SETOGE:
   3797     case ISD::SETGE: Opc = ARMISD::VCGE; break;
   3798     case ISD::SETUGE: Swap = true; // Fallthrough
   3799     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
   3800     case ISD::SETUGT: Swap = true; // Fallthrough
   3801     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
   3802     case ISD::SETUEQ: Invert = true; // Fallthrough
   3803     case ISD::SETONE:
   3804       // Expand this to (OLT | OGT).
   3805       TmpOp0 = Op0;
   3806       TmpOp1 = Op1;
   3807       Opc = ISD::OR;
   3808       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3809       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
   3810       break;
   3811     case ISD::SETUO: Invert = true; // Fallthrough
   3812     case ISD::SETO:
   3813       // Expand this to (OLT | OGE).
   3814       TmpOp0 = Op0;
   3815       TmpOp1 = Op1;
   3816       Opc = ISD::OR;
   3817       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3818       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
   3819       break;
   3820     }
   3821   } else {
   3822     // Integer comparisons.
   3823     switch (SetCCOpcode) {
   3824     default: llvm_unreachable("Illegal integer comparison");
   3825     case ISD::SETNE:  Invert = true;
   3826     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3827     case ISD::SETLT:  Swap = true;
   3828     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3829     case ISD::SETLE:  Swap = true;
   3830     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
   3831     case ISD::SETULT: Swap = true;
   3832     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
   3833     case ISD::SETULE: Swap = true;
   3834     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
   3835     }
   3836 
   3837     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
   3838     if (Opc == ARMISD::VCEQ) {
   3839 
   3840       SDValue AndOp;
   3841       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3842         AndOp = Op0;
   3843       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
   3844         AndOp = Op1;
   3845 
   3846       // Ignore bitconvert.
   3847       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
   3848         AndOp = AndOp.getOperand(0);
   3849 
   3850       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
   3851         Opc = ARMISD::VTST;
   3852         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
   3853         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
   3854         Invert = !Invert;
   3855       }
   3856     }
   3857   }
   3858 
   3859   if (Swap)
   3860     std::swap(Op0, Op1);
   3861 
   3862   // If one of the operands is a constant vector zero, attempt to fold the
   3863   // comparison to a specialized compare-against-zero form.
   3864   SDValue SingleOp;
   3865   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3866     SingleOp = Op0;
   3867   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
   3868     if (Opc == ARMISD::VCGE)
   3869       Opc = ARMISD::VCLEZ;
   3870     else if (Opc == ARMISD::VCGT)
   3871       Opc = ARMISD::VCLTZ;
   3872     SingleOp = Op1;
   3873   }
   3874 
   3875   SDValue Result;
   3876   if (SingleOp.getNode()) {
   3877     switch (Opc) {
   3878     case ARMISD::VCEQ:
   3879       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
   3880     case ARMISD::VCGE:
   3881       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
   3882     case ARMISD::VCLEZ:
   3883       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
   3884     case ARMISD::VCGT:
   3885       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
   3886     case ARMISD::VCLTZ:
   3887       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
   3888     default:
   3889       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
   3890     }
   3891   } else {
   3892      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
   3893   }
   3894 
   3895   if (Invert)
   3896     Result = DAG.getNOT(dl, Result, VT);
   3897 
   3898   return Result;
   3899 }
   3900 
   3901 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
   3902 /// valid vector constant for a NEON instruction with a "modified immediate"
   3903 /// operand (e.g., VMOV).  If so, return the encoded value.
   3904 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
   3905                                  unsigned SplatBitSize, SelectionDAG &DAG,
   3906                                  EVT &VT, bool is128Bits, NEONModImmType type) {
   3907   unsigned OpCmode, Imm;
   3908 
   3909   // SplatBitSize is set to the smallest size that splats the vector, so a
   3910   // zero vector will always have SplatBitSize == 8.  However, NEON modified
   3911   // immediate instructions others than VMOV do not support the 8-bit encoding
   3912   // of a zero vector, and the default encoding of zero is supposed to be the
   3913   // 32-bit version.
   3914   if (SplatBits == 0)
   3915     SplatBitSize = 32;
   3916 
   3917   switch (SplatBitSize) {
   3918   case 8:
   3919     if (type != VMOVModImm)
   3920       return SDValue();
   3921     // Any 1-byte value is OK.  Op=0, Cmode=1110.
   3922     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
   3923     OpCmode = 0xe;
   3924     Imm = SplatBits;
   3925     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
   3926     break;
   3927 
   3928   case 16:
   3929     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
   3930     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
   3931     if ((SplatBits & ~0xff) == 0) {
   3932       // Value = 0x00nn: Op=x, Cmode=100x.
   3933       OpCmode = 0x8;
   3934       Imm = SplatBits;
   3935       break;
   3936     }
   3937     if ((SplatBits & ~0xff00) == 0) {
   3938       // Value = 0xnn00: Op=x, Cmode=101x.
   3939       OpCmode = 0xa;
   3940       Imm = SplatBits >> 8;
   3941       break;
   3942     }
   3943     return SDValue();
   3944 
   3945   case 32:
   3946     // NEON's 32-bit VMOV supports splat values where:
   3947     // * only one byte is nonzero, or
   3948     // * the least significant byte is 0xff and the second byte is nonzero, or
   3949     // * the least significant 2 bytes are 0xff and the third is nonzero.
   3950     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
   3951     if ((SplatBits & ~0xff) == 0) {
   3952       // Value = 0x000000nn: Op=x, Cmode=000x.
   3953       OpCmode = 0;
   3954       Imm = SplatBits;
   3955       break;
   3956     }
   3957     if ((SplatBits & ~0xff00) == 0) {
   3958       // Value = 0x0000nn00: Op=x, Cmode=001x.
   3959       OpCmode = 0x2;
   3960       Imm = SplatBits >> 8;
   3961       break;
   3962     }
   3963     if ((SplatBits & ~0xff0000) == 0) {
   3964       // Value = 0x00nn0000: Op=x, Cmode=010x.
   3965       OpCmode = 0x4;
   3966       Imm = SplatBits >> 16;
   3967       break;
   3968     }
   3969     if ((SplatBits & ~0xff000000) == 0) {
   3970       // Value = 0xnn000000: Op=x, Cmode=011x.
   3971       OpCmode = 0x6;
   3972       Imm = SplatBits >> 24;
   3973       break;
   3974     }
   3975 
   3976     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
   3977     if (type == OtherModImm) return SDValue();
   3978 
   3979     if ((SplatBits & ~0xffff) == 0 &&
   3980         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
   3981       // Value = 0x0000nnff: Op=x, Cmode=1100.
   3982       OpCmode = 0xc;
   3983       Imm = SplatBits >> 8;
   3984       SplatBits |= 0xff;
   3985       break;
   3986     }
   3987 
   3988     if ((SplatBits & ~0xffffff) == 0 &&
   3989         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
   3990       // Value = 0x00nnffff: Op=x, Cmode=1101.
   3991       OpCmode = 0xd;
   3992       Imm = SplatBits >> 16;
   3993       SplatBits |= 0xffff;
   3994       break;
   3995     }
   3996 
   3997     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
   3998     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
   3999     // VMOV.I32.  A (very) minor optimization would be to replicate the value
   4000     // and fall through here to test for a valid 64-bit splat.  But, then the
   4001     // caller would also need to check and handle the change in size.
   4002     return SDValue();
   4003 
   4004   case 64: {
   4005     if (type != VMOVModImm)
   4006       return SDValue();
   4007     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
   4008     uint64_t BitMask = 0xff;
   4009     uint64_t Val = 0;
   4010     unsigned ImmMask = 1;
   4011     Imm = 0;
   4012     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
   4013       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
   4014         Val |= BitMask;
   4015         Imm |= ImmMask;
   4016       } else if ((SplatBits & BitMask) != 0) {
   4017         return SDValue();
   4018       }
   4019       BitMask <<= 8;
   4020       ImmMask <<= 1;
   4021     }
   4022     // Op=1, Cmode=1110.
   4023     OpCmode = 0x1e;
   4024     SplatBits = Val;
   4025     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
   4026     break;
   4027   }
   4028 
   4029   default:
   4030     llvm_unreachable("unexpected size for isNEONModifiedImm");
   4031   }
   4032 
   4033   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
   4034   return DAG.getTargetConstant(EncodedVal, MVT::i32);
   4035 }
   4036 
   4037 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
   4038                                            const ARMSubtarget *ST) const {
   4039   if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
   4040     return SDValue();
   4041 
   4042   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
   4043   assert(Op.getValueType() == MVT::f32 &&
   4044          "ConstantFP custom lowering should only occur for f32.");
   4045 
   4046   // Try splatting with a VMOV.f32...
   4047   APFloat FPVal = CFP->getValueAPF();
   4048   int ImmVal = ARM_AM::getFP32Imm(FPVal);
   4049   if (ImmVal != -1) {
   4050     DebugLoc DL = Op.getDebugLoc();
   4051     SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
   4052     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
   4053                                       NewVal);
   4054     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
   4055                        DAG.getConstant(0, MVT::i32));
   4056   }
   4057 
   4058   // If that fails, try a VMOV.i32
   4059   EVT VMovVT;
   4060   unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
   4061   SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
   4062                                      VMOVModImm);
   4063   if (NewVal != SDValue()) {
   4064     DebugLoc DL = Op.getDebugLoc();
   4065     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
   4066                                       NewVal);
   4067     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
   4068                                        VecConstant);
   4069     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
   4070                        DAG.getConstant(0, MVT::i32));
   4071   }
   4072 
   4073   // Finally, try a VMVN.i32
   4074   NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
   4075                              VMVNModImm);
   4076   if (NewVal != SDValue()) {
   4077     DebugLoc DL = Op.getDebugLoc();
   4078     SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
   4079     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
   4080                                        VecConstant);
   4081     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
   4082                        DAG.getConstant(0, MVT::i32));
   4083   }
   4084 
   4085   return SDValue();
   4086 }
   4087 
   4088 // check if an VEXT instruction can handle the shuffle mask when the
   4089 // vector sources of the shuffle are the same.
   4090 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
   4091   unsigned NumElts = VT.getVectorNumElements();
   4092 
   4093   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
   4094   if (M[0] < 0)
   4095     return false;
   4096 
   4097   Imm = M[0];
   4098 
   4099   // If this is a VEXT shuffle, the immediate value is the index of the first
   4100   // element.  The other shuffle indices must be the successive elements after
   4101   // the first one.
   4102   unsigned ExpectedElt = Imm;
   4103   for (unsigned i = 1; i < NumElts; ++i) {
   4104     // Increment the expected index.  If it wraps around, just follow it
   4105     // back to index zero and keep going.
   4106     ++ExpectedElt;
   4107     if (ExpectedElt == NumElts)
   4108       ExpectedElt = 0;
   4109 
   4110     if (M[i] < 0) continue; // ignore UNDEF indices
   4111     if (ExpectedElt != static_cast<unsigned>(M[i]))
   4112       return false;
   4113   }
   4114 
   4115   return true;
   4116 }
   4117 
   4118 
   4119 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
   4120                        bool &ReverseVEXT, unsigned &Imm) {
   4121   unsigned NumElts = VT.getVectorNumElements();
   4122   ReverseVEXT = false;
   4123 
   4124   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
   4125   if (M[0] < 0)
   4126     return false;
   4127 
   4128   Imm = M[0];
   4129 
   4130   // If this is a VEXT shuffle, the immediate value is the index of the first
   4131   // element.  The other shuffle indices must be the successive elements after
   4132   // the first one.
   4133   unsigned ExpectedElt = Imm;
   4134   for (unsigned i = 1; i < NumElts; ++i) {
   4135     // Increment the expected index.  If it wraps around, it may still be
   4136     // a VEXT but the source vectors must be swapped.
   4137     ExpectedElt += 1;
   4138     if (ExpectedElt == NumElts * 2) {
   4139       ExpectedElt = 0;
   4140       ReverseVEXT = true;
   4141     }
   4142 
   4143     if (M[i] < 0) continue; // ignore UNDEF indices
   4144     if (ExpectedElt != static_cast<unsigned>(M[i]))
   4145       return false;
   4146   }
   4147 
   4148   // Adjust the index value if the source operands will be swapped.
   4149   if (ReverseVEXT)
   4150     Imm -= NumElts;
   4151 
   4152   return true;
   4153 }
   4154 
   4155 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
   4156 /// instruction with the specified blocksize.  (The order of the elements
   4157 /// within each block of the vector is reversed.)
   4158 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   4159   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
   4160          "Only possible block sizes for VREV are: 16, 32, 64");
   4161 
   4162   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4163   if (EltSz == 64)
   4164     return false;
   4165 
   4166   unsigned NumElts = VT.getVectorNumElements();
   4167   unsigned BlockElts = M[0] + 1;
   4168   // If the first shuffle index is UNDEF, be optimistic.
   4169   if (M[0] < 0)
   4170     BlockElts = BlockSize / EltSz;
   4171 
   4172   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
   4173     return false;
   4174 
   4175   for (unsigned i = 0; i < NumElts; ++i) {
   4176     if (M[i] < 0) continue; // ignore UNDEF indices
   4177     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
   4178       return false;
   4179   }
   4180 
   4181   return true;
   4182 }
   4183 
   4184 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
   4185   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
   4186   // range, then 0 is placed into the resulting vector. So pretty much any mask
   4187   // of 8 elements can work here.
   4188   return VT == MVT::v8i8 && M.size() == 8;
   4189 }
   4190 
   4191 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4192   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4193   if (EltSz == 64)
   4194     return false;
   4195 
   4196   unsigned NumElts = VT.getVectorNumElements();
   4197   WhichResult = (M[0] == 0 ? 0 : 1);
   4198   for (unsigned i = 0; i < NumElts; i += 2) {
   4199     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
   4200         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
   4201       return false;
   4202   }
   4203   return true;
   4204 }
   4205 
   4206 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
   4207 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4208 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
   4209 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4210   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4211   if (EltSz == 64)
   4212     return false;
   4213 
   4214   unsigned NumElts = VT.getVectorNumElements();
   4215   WhichResult = (M[0] == 0 ? 0 : 1);
   4216   for (unsigned i = 0; i < NumElts; i += 2) {
   4217     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
   4218         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
   4219       return false;
   4220   }
   4221   return true;
   4222 }
   4223 
   4224 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4225   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4226   if (EltSz == 64)
   4227     return false;
   4228 
   4229   unsigned NumElts = VT.getVectorNumElements();
   4230   WhichResult = (M[0] == 0 ? 0 : 1);
   4231   for (unsigned i = 0; i != NumElts; ++i) {
   4232     if (M[i] < 0) continue; // ignore UNDEF indices
   4233     if ((unsigned) M[i] != 2 * i + WhichResult)
   4234       return false;
   4235   }
   4236 
   4237   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4238   if (VT.is64BitVector() && EltSz == 32)
   4239     return false;
   4240 
   4241   return true;
   4242 }
   4243 
   4244 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
   4245 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4246 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
   4247 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4248   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4249   if (EltSz == 64)
   4250     return false;
   4251 
   4252   unsigned Half = VT.getVectorNumElements() / 2;
   4253   WhichResult = (M[0] == 0 ? 0 : 1);
   4254   for (unsigned j = 0; j != 2; ++j) {
   4255     unsigned Idx = WhichResult;
   4256     for (unsigned i = 0; i != Half; ++i) {
   4257       int MIdx = M[i + j * Half];
   4258       if (MIdx >= 0 && (unsigned) MIdx != Idx)
   4259         return false;
   4260       Idx += 2;
   4261     }
   4262   }
   4263 
   4264   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4265   if (VT.is64BitVector() && EltSz == 32)
   4266     return false;
   4267 
   4268   return true;
   4269 }
   4270 
   4271 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4272   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4273   if (EltSz == 64)
   4274     return false;
   4275 
   4276   unsigned NumElts = VT.getVectorNumElements();
   4277   WhichResult = (M[0] == 0 ? 0 : 1);
   4278   unsigned Idx = WhichResult * NumElts / 2;
   4279   for (unsigned i = 0; i != NumElts; i += 2) {
   4280     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
   4281         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
   4282       return false;
   4283     Idx += 1;
   4284   }
   4285 
   4286   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4287   if (VT.is64BitVector() && EltSz == 32)
   4288     return false;
   4289 
   4290   return true;
   4291 }
   4292 
   4293 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
   4294 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4295 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
   4296 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4297   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4298   if (EltSz == 64)
   4299     return false;
   4300 
   4301   unsigned NumElts = VT.getVectorNumElements();
   4302   WhichResult = (M[0] == 0 ? 0 : 1);
   4303   unsigned Idx = WhichResult * NumElts / 2;
   4304   for (unsigned i = 0; i != NumElts; i += 2) {
   4305     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
   4306         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
   4307       return false;
   4308     Idx += 1;
   4309   }
   4310 
   4311   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4312   if (VT.is64BitVector() && EltSz == 32)
   4313     return false;
   4314 
   4315   return true;
   4316 }
   4317 
   4318 /// \return true if this is a reverse operation on an vector.
   4319 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
   4320   unsigned NumElts = VT.getVectorNumElements();
   4321   // Make sure the mask has the right size.
   4322   if (NumElts != M.size())
   4323       return false;
   4324 
   4325   // Look for <15, ..., 3, -1, 1, 0>.
   4326   for (unsigned i = 0; i != NumElts; ++i)
   4327     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
   4328       return false;
   4329 
   4330   return true;
   4331 }
   4332 
   4333 // If N is an integer constant that can be moved into a register in one
   4334 // instruction, return an SDValue of such a constant (will become a MOV
   4335 // instruction).  Otherwise return null.
   4336 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
   4337                                      const ARMSubtarget *ST, DebugLoc dl) {
   4338   uint64_t Val;
   4339   if (!isa<ConstantSDNode>(N))
   4340     return SDValue();
   4341   Val = cast<ConstantSDNode>(N)->getZExtValue();
   4342 
   4343   if (ST->isThumb1Only()) {
   4344     if (Val <= 255 || ~Val <= 255)
   4345       return DAG.getConstant(Val, MVT::i32);
   4346   } else {
   4347     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
   4348       return DAG.getConstant(Val, MVT::i32);
   4349   }
   4350   return SDValue();
   4351 }
   4352 
   4353 // If this is a case we can't handle, return null and let the default
   4354 // expansion code take care of it.
   4355 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   4356                                              const ARMSubtarget *ST) const {
   4357   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   4358   DebugLoc dl = Op.getDebugLoc();
   4359   EVT VT = Op.getValueType();
   4360 
   4361   APInt SplatBits, SplatUndef;
   4362   unsigned SplatBitSize;
   4363   bool HasAnyUndefs;
   4364   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
   4365     if (SplatBitSize <= 64) {
   4366       // Check if an immediate VMOV works.
   4367       EVT VmovVT;
   4368       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
   4369                                       SplatUndef.getZExtValue(), SplatBitSize,
   4370                                       DAG, VmovVT, VT.is128BitVector(),
   4371                                       VMOVModImm);
   4372       if (Val.getNode()) {
   4373         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
   4374         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   4375       }
   4376 
   4377       // Try an immediate VMVN.
   4378       uint64_t NegatedImm = (~SplatBits).getZExtValue();
   4379       Val = isNEONModifiedImm(NegatedImm,
   4380                                       SplatUndef.getZExtValue(), SplatBitSize,
   4381                                       DAG, VmovVT, VT.is128BitVector(),
   4382                                       VMVNModImm);
   4383       if (Val.getNode()) {
   4384         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
   4385         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   4386       }
   4387 
   4388       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
   4389       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
   4390         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
   4391         if (ImmVal != -1) {
   4392           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
   4393           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
   4394         }
   4395       }
   4396     }
   4397   }
   4398 
   4399   // Scan through the operands to see if only one value is used.
   4400   //
   4401   // As an optimisation, even if more than one value is used it may be more
   4402   // profitable to splat with one value then change some lanes.
   4403   //
   4404   // Heuristically we decide to do this if the vector has a "dominant" value,
   4405   // defined as splatted to more than half of the lanes.
   4406   unsigned NumElts = VT.getVectorNumElements();
   4407   bool isOnlyLowElement = true;
   4408   bool usesOnlyOneValue = true;
   4409   bool hasDominantValue = false;
   4410   bool isConstant = true;
   4411 
   4412   // Map of the number of times a particular SDValue appears in the
   4413   // element list.
   4414   DenseMap<SDValue, unsigned> ValueCounts;
   4415   SDValue Value;
   4416   for (unsigned i = 0; i < NumElts; ++i) {
   4417     SDValue V = Op.getOperand(i);
   4418     if (V.getOpcode() == ISD::UNDEF)
   4419       continue;
   4420     if (i > 0)
   4421       isOnlyLowElement = false;
   4422     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
   4423       isConstant = false;
   4424 
   4425     ValueCounts.insert(std::make_pair(V, 0));
   4426     unsigned &Count = ValueCounts[V];
   4427 
   4428     // Is this value dominant? (takes up more than half of the lanes)
   4429     if (++Count > (NumElts / 2)) {
   4430       hasDominantValue = true;
   4431       Value = V;
   4432     }
   4433   }
   4434   if (ValueCounts.size() != 1)
   4435     usesOnlyOneValue = false;
   4436   if (!Value.getNode() && ValueCounts.size() > 0)
   4437     Value = ValueCounts.begin()->first;
   4438 
   4439   if (ValueCounts.size() == 0)
   4440     return DAG.getUNDEF(VT);
   4441 
   4442   if (isOnlyLowElement)
   4443     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
   4444 
   4445   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4446 
   4447   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
   4448   // i32 and try again.
   4449   if (hasDominantValue && EltSize <= 32) {
   4450     if (!isConstant) {
   4451       SDValue N;
   4452 
   4453       // If we are VDUPing a value that comes directly from a vector, that will
   4454       // cause an unnecessary move to and from a GPR, where instead we could
   4455       // just use VDUPLANE. We can only do this if the lane being extracted
   4456       // is at a constant index, as the VDUP from lane instructions only have
   4457       // constant-index forms.
   4458       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   4459           isa<ConstantSDNode>(Value->getOperand(1))) {
   4460         // We need to create a new undef vector to use for the VDUPLANE if the
   4461         // size of the vector from which we get the value is different than the
   4462         // size of the vector that we need to create. We will insert the element
   4463         // such that the register coalescer will remove unnecessary copies.
   4464         if (VT != Value->getOperand(0).getValueType()) {
   4465           ConstantSDNode *constIndex;
   4466           constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
   4467           assert(constIndex && "The index is not a constant!");
   4468           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
   4469                              VT.getVectorNumElements();
   4470           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4471                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
   4472                         Value, DAG.getConstant(index, MVT::i32)),
   4473                            DAG.getConstant(index, MVT::i32));
   4474         } else
   4475           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4476                         Value->getOperand(0), Value->getOperand(1));
   4477       } else
   4478         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
   4479 
   4480       if (!usesOnlyOneValue) {
   4481         // The dominant value was splatted as 'N', but we now have to insert
   4482         // all differing elements.
   4483         for (unsigned I = 0; I < NumElts; ++I) {
   4484           if (Op.getOperand(I) == Value)
   4485             continue;
   4486           SmallVector<SDValue, 3> Ops;
   4487           Ops.push_back(N);
   4488           Ops.push_back(Op.getOperand(I));
   4489           Ops.push_back(DAG.getConstant(I, MVT::i32));
   4490           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
   4491         }
   4492       }
   4493       return N;
   4494     }
   4495     if (VT.getVectorElementType().isFloatingPoint()) {
   4496       SmallVector<SDValue, 8> Ops;
   4497       for (unsigned i = 0; i < NumElts; ++i)
   4498         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
   4499                                   Op.getOperand(i)));
   4500       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
   4501       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
   4502       Val = LowerBUILD_VECTOR(Val, DAG, ST);
   4503       if (Val.getNode())
   4504         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4505     }
   4506     if (usesOnlyOneValue) {
   4507       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
   4508       if (isConstant && Val.getNode())
   4509         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
   4510     }
   4511   }
   4512 
   4513   // If all elements are constants and the case above didn't get hit, fall back
   4514   // to the default expansion, which will generate a load from the constant
   4515   // pool.
   4516   if (isConstant)
   4517     return SDValue();
   4518 
   4519   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
   4520   if (NumElts >= 4) {
   4521     SDValue shuffle = ReconstructShuffle(Op, DAG);
   4522     if (shuffle != SDValue())
   4523       return shuffle;
   4524   }
   4525 
   4526   // Vectors with 32- or 64-bit elements can be built by directly assigning
   4527   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
   4528   // will be legalized.
   4529   if (EltSize >= 32) {
   4530     // Do the expansion with floating-point types, since that is what the VFP
   4531     // registers are defined to use, and since i64 is not legal.
   4532     EVT EltVT = EVT::getFloatingPointVT(EltSize);
   4533     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
   4534     SmallVector<SDValue, 8> Ops;
   4535     for (unsigned i = 0; i < NumElts; ++i)
   4536       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
   4537     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
   4538     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4539   }
   4540 
   4541   return SDValue();
   4542 }
   4543 
   4544 // Gather data to see if the operation can be modelled as a
   4545 // shuffle in combination with VEXTs.
   4546 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
   4547                                               SelectionDAG &DAG) const {
   4548   DebugLoc dl = Op.getDebugLoc();
   4549   EVT VT = Op.getValueType();
   4550   unsigned NumElts = VT.getVectorNumElements();
   4551 
   4552   SmallVector<SDValue, 2> SourceVecs;
   4553   SmallVector<unsigned, 2> MinElts;
   4554   SmallVector<unsigned, 2> MaxElts;
   4555 
   4556   for (unsigned i = 0; i < NumElts; ++i) {
   4557     SDValue V = Op.getOperand(i);
   4558     if (V.getOpcode() == ISD::UNDEF)
   4559       continue;
   4560     else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
   4561       // A shuffle can only come from building a vector from various
   4562       // elements of other vectors.
   4563       return SDValue();
   4564     } else if (V.getOperand(0).getValueType().getVectorElementType() !=
   4565                VT.getVectorElementType()) {
   4566       // This code doesn't know how to handle shuffles where the vector
   4567       // element types do not match (this happens because type legalization
   4568       // promotes the return type of EXTRACT_VECTOR_ELT).
   4569       // FIXME: It might be appropriate to extend this code to handle
   4570       // mismatched types.
   4571       return SDValue();
   4572     }
   4573 
   4574     // Record this extraction against the appropriate vector if possible...
   4575     SDValue SourceVec = V.getOperand(0);
   4576     // If the element number isn't a constant, we can't effectively
   4577     // analyze what's going on.
   4578     if (!isa<ConstantSDNode>(V.getOperand(1)))
   4579       return SDValue();
   4580     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
   4581     bool FoundSource = false;
   4582     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
   4583       if (SourceVecs[j] == SourceVec) {
   4584         if (MinElts[j] > EltNo)
   4585           MinElts[j] = EltNo;
   4586         if (MaxElts[j] < EltNo)
   4587           MaxElts[j] = EltNo;
   4588         FoundSource = true;
   4589         break;
   4590       }
   4591     }
   4592 
   4593     // Or record a new source if not...
   4594     if (!FoundSource) {
   4595       SourceVecs.push_back(SourceVec);
   4596       MinElts.push_back(EltNo);
   4597       MaxElts.push_back(EltNo);
   4598     }
   4599   }
   4600 
   4601   // Currently only do something sane when at most two source vectors
   4602   // involved.
   4603   if (SourceVecs.size() > 2)
   4604     return SDValue();
   4605 
   4606   SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
   4607   int VEXTOffsets[2] = {0, 0};
   4608 
   4609   // This loop extracts the usage patterns of the source vectors
   4610   // and prepares appropriate SDValues for a shuffle if possible.
   4611   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
   4612     if (SourceVecs[i].getValueType() == VT) {
   4613       // No VEXT necessary
   4614       ShuffleSrcs[i] = SourceVecs[i];
   4615       VEXTOffsets[i] = 0;
   4616       continue;
   4617     } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
   4618       // It probably isn't worth padding out a smaller vector just to
   4619       // break it down again in a shuffle.
   4620       return SDValue();
   4621     }
   4622 
   4623     // Since only 64-bit and 128-bit vectors are legal on ARM and
   4624     // we've eliminated the other cases...
   4625     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
   4626            "unexpected vector sizes in ReconstructShuffle");
   4627 
   4628     if (MaxElts[i] - MinElts[i] >= NumElts) {
   4629       // Span too large for a VEXT to cope
   4630       return SDValue();
   4631     }
   4632 
   4633     if (MinElts[i] >= NumElts) {
   4634       // The extraction can just take the second half
   4635       VEXTOffsets[i] = NumElts;
   4636       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4637                                    SourceVecs[i],
   4638                                    DAG.getIntPtrConstant(NumElts));
   4639     } else if (MaxElts[i] < NumElts) {
   4640       // The extraction can just take the first half
   4641       VEXTOffsets[i] = 0;
   4642       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4643                                    SourceVecs[i],
   4644                                    DAG.getIntPtrConstant(0));
   4645     } else {
   4646       // An actual VEXT is needed
   4647       VEXTOffsets[i] = MinElts[i];
   4648       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4649                                      SourceVecs[i],
   4650                                      DAG.getIntPtrConstant(0));
   4651       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4652                                      SourceVecs[i],
   4653                                      DAG.getIntPtrConstant(NumElts));
   4654       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
   4655                                    DAG.getConstant(VEXTOffsets[i], MVT::i32));
   4656     }
   4657   }
   4658 
   4659   SmallVector<int, 8> Mask;
   4660 
   4661   for (unsigned i = 0; i < NumElts; ++i) {
   4662     SDValue Entry = Op.getOperand(i);
   4663     if (Entry.getOpcode() == ISD::UNDEF) {
   4664       Mask.push_back(-1);
   4665       continue;
   4666     }
   4667 
   4668     SDValue ExtractVec = Entry.getOperand(0);
   4669     int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
   4670                                           .getOperand(1))->getSExtValue();
   4671     if (ExtractVec == SourceVecs[0]) {
   4672       Mask.push_back(ExtractElt - VEXTOffsets[0]);
   4673     } else {
   4674       Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
   4675     }
   4676   }
   4677 
   4678   // Final check before we try to produce nonsense...
   4679   if (isShuffleMaskLegal(Mask, VT))
   4680     return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
   4681                                 &Mask[0]);
   4682 
   4683   return SDValue();
   4684 }
   4685 
   4686 /// isShuffleMaskLegal - Targets can use this to indicate that they only
   4687 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
   4688 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
   4689 /// are assumed to be legal.
   4690 bool
   4691 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   4692                                       EVT VT) const {
   4693   if (VT.getVectorNumElements() == 4 &&
   4694       (VT.is128BitVector() || VT.is64BitVector())) {
   4695     unsigned PFIndexes[4];
   4696     for (unsigned i = 0; i != 4; ++i) {
   4697       if (M[i] < 0)
   4698         PFIndexes[i] = 8;
   4699       else
   4700         PFIndexes[i] = M[i];
   4701     }
   4702 
   4703     // Compute the index in the perfect shuffle table.
   4704     unsigned PFTableIndex =
   4705       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
   4706     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
   4707     unsigned Cost = (PFEntry >> 30);
   4708 
   4709     if (Cost <= 4)
   4710       return true;
   4711   }
   4712 
   4713   bool ReverseVEXT;
   4714   unsigned Imm, WhichResult;
   4715 
   4716   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4717   return (EltSize >= 32 ||
   4718           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
   4719           isVREVMask(M, VT, 64) ||
   4720           isVREVMask(M, VT, 32) ||
   4721           isVREVMask(M, VT, 16) ||
   4722           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
   4723           isVTBLMask(M, VT) ||
   4724           isVTRNMask(M, VT, WhichResult) ||
   4725           isVUZPMask(M, VT, WhichResult) ||
   4726           isVZIPMask(M, VT, WhichResult) ||
   4727           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
   4728           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
   4729           isVZIP_v_undef_Mask(M, VT, WhichResult) ||
   4730           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
   4731 }
   4732 
   4733 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
   4734 /// the specified operations to build the shuffle.
   4735 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   4736                                       SDValue RHS, SelectionDAG &DAG,
   4737                                       DebugLoc dl) {
   4738   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   4739   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   4740   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
   4741 
   4742   enum {
   4743     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
   4744     OP_VREV,
   4745     OP_VDUP0,
   4746     OP_VDUP1,
   4747     OP_VDUP2,
   4748     OP_VDUP3,
   4749     OP_VEXT1,
   4750     OP_VEXT2,
   4751     OP_VEXT3,
   4752     OP_VUZPL, // VUZP, left result
   4753     OP_VUZPR, // VUZP, right result
   4754     OP_VZIPL, // VZIP, left result
   4755     OP_VZIPR, // VZIP, right result
   4756     OP_VTRNL, // VTRN, left result
   4757     OP_VTRNR  // VTRN, right result
   4758   };
   4759 
   4760   if (OpNum == OP_COPY) {
   4761     if (LHSID == (1*9+2)*9+3) return LHS;
   4762     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
   4763     return RHS;
   4764   }
   4765 
   4766   SDValue OpLHS, OpRHS;
   4767   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
   4768   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
   4769   EVT VT = OpLHS.getValueType();
   4770 
   4771   switch (OpNum) {
   4772   default: llvm_unreachable("Unknown shuffle opcode!");
   4773   case OP_VREV:
   4774     // VREV divides the vector in half and swaps within the half.
   4775     if (VT.getVectorElementType() == MVT::i32 ||
   4776         VT.getVectorElementType() == MVT::f32)
   4777       return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
   4778     // vrev <4 x i16> -> VREV32
   4779     if (VT.getVectorElementType() == MVT::i16)
   4780       return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
   4781     // vrev <4 x i8> -> VREV16
   4782     assert(VT.getVectorElementType() == MVT::i8);
   4783     return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
   4784   case OP_VDUP0:
   4785   case OP_VDUP1:
   4786   case OP_VDUP2:
   4787   case OP_VDUP3:
   4788     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4789                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
   4790   case OP_VEXT1:
   4791   case OP_VEXT2:
   4792   case OP_VEXT3:
   4793     return DAG.getNode(ARMISD::VEXT, dl, VT,
   4794                        OpLHS, OpRHS,
   4795                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
   4796   case OP_VUZPL:
   4797   case OP_VUZPR:
   4798     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4799                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
   4800   case OP_VZIPL:
   4801   case OP_VZIPR:
   4802     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4803                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
   4804   case OP_VTRNL:
   4805   case OP_VTRNR:
   4806     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4807                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
   4808   }
   4809 }
   4810 
   4811 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
   4812                                        ArrayRef<int> ShuffleMask,
   4813                                        SelectionDAG &DAG) {
   4814   // Check to see if we can use the VTBL instruction.
   4815   SDValue V1 = Op.getOperand(0);
   4816   SDValue V2 = Op.getOperand(1);
   4817   DebugLoc DL = Op.getDebugLoc();
   4818 
   4819   SmallVector<SDValue, 8> VTBLMask;
   4820   for (ArrayRef<int>::iterator
   4821          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
   4822     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
   4823 
   4824   if (V2.getNode()->getOpcode() == ISD::UNDEF)
   4825     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
   4826                        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
   4827                                    &VTBLMask[0], 8));
   4828 
   4829   return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
   4830                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
   4831                                  &VTBLMask[0], 8));
   4832 }
   4833 
   4834 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
   4835                                                       SelectionDAG &DAG) {
   4836   DebugLoc DL = Op.getDebugLoc();
   4837   SDValue OpLHS = Op.getOperand(0);
   4838   EVT VT = OpLHS.getValueType();
   4839 
   4840   assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
   4841          "Expect an v8i16/v16i8 type");
   4842   OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
   4843   // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
   4844   // extract the first 8 bytes into the top double word and the last 8 bytes
   4845   // into the bottom double word. The v8i16 case is similar.
   4846   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
   4847   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
   4848                      DAG.getConstant(ExtractNum, MVT::i32));
   4849 }
   4850 
   4851 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   4852   SDValue V1 = Op.getOperand(0);
   4853   SDValue V2 = Op.getOperand(1);
   4854   DebugLoc dl = Op.getDebugLoc();
   4855   EVT VT = Op.getValueType();
   4856   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
   4857 
   4858   // Convert shuffles that are directly supported on NEON to target-specific
   4859   // DAG nodes, instead of keeping them as shuffles and matching them again
   4860   // during code selection.  This is more efficient and avoids the possibility
   4861   // of inconsistencies between legalization and selection.
   4862   // FIXME: floating-point vectors should be canonicalized to integer vectors
   4863   // of the same time so that they get CSEd properly.
   4864   ArrayRef<int> ShuffleMask = SVN->getMask();
   4865 
   4866   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4867   if (EltSize <= 32) {
   4868     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
   4869       int Lane = SVN->getSplatIndex();
   4870       // If this is undef splat, generate it via "just" vdup, if possible.
   4871       if (Lane == -1) Lane = 0;
   4872 
   4873       // Test if V1 is a SCALAR_TO_VECTOR.
   4874       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   4875         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
   4876       }
   4877       // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
   4878       // (and probably will turn into a SCALAR_TO_VECTOR once legalization
   4879       // reaches it).
   4880       if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
   4881           !isa<ConstantSDNode>(V1.getOperand(0))) {
   4882         bool IsScalarToVector = true;
   4883         for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
   4884           if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
   4885             IsScalarToVector = false;
   4886             break;
   4887           }
   4888         if (IsScalarToVector)
   4889           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
   4890       }
   4891       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
   4892                          DAG.getConstant(Lane, MVT::i32));
   4893     }
   4894 
   4895     bool ReverseVEXT;
   4896     unsigned Imm;
   4897     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
   4898       if (ReverseVEXT)
   4899         std::swap(V1, V2);
   4900       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
   4901                          DAG.getConstant(Imm, MVT::i32));
   4902     }
   4903 
   4904     if (isVREVMask(ShuffleMask, VT, 64))
   4905       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
   4906     if (isVREVMask(ShuffleMask, VT, 32))
   4907       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
   4908     if (isVREVMask(ShuffleMask, VT, 16))
   4909       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
   4910 
   4911     if (V2->getOpcode() == ISD::UNDEF &&
   4912         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
   4913       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
   4914                          DAG.getConstant(Imm, MVT::i32));
   4915     }
   4916 
   4917     // Check for Neon shuffles that modify both input vectors in place.
   4918     // If both results are used, i.e., if there are two shuffles with the same
   4919     // source operands and with masks corresponding to both results of one of
   4920     // these operations, DAG memoization will ensure that a single node is
   4921     // used for both shuffles.
   4922     unsigned WhichResult;
   4923     if (isVTRNMask(ShuffleMask, VT, WhichResult))
   4924       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4925                          V1, V2).getValue(WhichResult);
   4926     if (isVUZPMask(ShuffleMask, VT, WhichResult))
   4927       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4928                          V1, V2).getValue(WhichResult);
   4929     if (isVZIPMask(ShuffleMask, VT, WhichResult))
   4930       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4931                          V1, V2).getValue(WhichResult);
   4932 
   4933     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4934       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4935                          V1, V1).getValue(WhichResult);
   4936     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4937       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4938                          V1, V1).getValue(WhichResult);
   4939     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4940       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4941                          V1, V1).getValue(WhichResult);
   4942   }
   4943 
   4944   // If the shuffle is not directly supported and it has 4 elements, use
   4945   // the PerfectShuffle-generated table to synthesize it from other shuffles.
   4946   unsigned NumElts = VT.getVectorNumElements();
   4947   if (NumElts == 4) {
   4948     unsigned PFIndexes[4];
   4949     for (unsigned i = 0; i != 4; ++i) {
   4950       if (ShuffleMask[i] < 0)
   4951         PFIndexes[i] = 8;
   4952       else
   4953         PFIndexes[i] = ShuffleMask[i];
   4954     }
   4955 
   4956     // Compute the index in the perfect shuffle table.
   4957     unsigned PFTableIndex =
   4958       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
   4959     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
   4960     unsigned Cost = (PFEntry >> 30);
   4961 
   4962     if (Cost <= 4)
   4963       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
   4964   }
   4965 
   4966   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
   4967   if (EltSize >= 32) {
   4968     // Do the expansion with floating-point types, since that is what the VFP
   4969     // registers are defined to use, and since i64 is not legal.
   4970     EVT EltVT = EVT::getFloatingPointVT(EltSize);
   4971     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
   4972     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
   4973     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
   4974     SmallVector<SDValue, 8> Ops;
   4975     for (unsigned i = 0; i < NumElts; ++i) {
   4976       if (ShuffleMask[i] < 0)
   4977         Ops.push_back(DAG.getUNDEF(EltVT));
   4978       else
   4979         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
   4980                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
   4981                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
   4982                                                   MVT::i32)));
   4983     }
   4984     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
   4985     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4986   }
   4987 
   4988   if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
   4989     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
   4990 
   4991   if (VT == MVT::v8i8) {
   4992     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
   4993     if (NewOp.getNode())
   4994       return NewOp;
   4995   }
   4996 
   4997   return SDValue();
   4998 }
   4999 
   5000 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   5001   // INSERT_VECTOR_ELT is legal only for immediate indexes.
   5002   SDValue Lane = Op.getOperand(2);
   5003   if (!isa<ConstantSDNode>(Lane))
   5004     return SDValue();
   5005 
   5006   return Op;
   5007 }
   5008 
   5009 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   5010   // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
   5011   SDValue Lane = Op.getOperand(1);
   5012   if (!isa<ConstantSDNode>(Lane))
   5013     return SDValue();
   5014 
   5015   SDValue Vec = Op.getOperand(0);
   5016   if (Op.getValueType() == MVT::i32 &&
   5017       Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
   5018     DebugLoc dl = Op.getDebugLoc();
   5019     return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
   5020   }
   5021 
   5022   return Op;
   5023 }
   5024 
   5025 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   5026   // The only time a CONCAT_VECTORS operation can have legal types is when
   5027   // two 64-bit vectors are concatenated to a 128-bit vector.
   5028   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
   5029          "unexpected CONCAT_VECTORS");
   5030   DebugLoc dl = Op.getDebugLoc();
   5031   SDValue Val = DAG.getUNDEF(MVT::v2f64);
   5032   SDValue Op0 = Op.getOperand(0);
   5033   SDValue Op1 = Op.getOperand(1);
   5034   if (Op0.getOpcode() != ISD::UNDEF)
   5035     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
   5036                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
   5037                       DAG.getIntPtrConstant(0));
   5038   if (Op1.getOpcode() != ISD::UNDEF)
   5039     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
   5040                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
   5041                       DAG.getIntPtrConstant(1));
   5042   return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
   5043 }
   5044 
   5045 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
   5046 /// element has been zero/sign-extended, depending on the isSigned parameter,
   5047 /// from an integer type half its size.
   5048 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
   5049                                    bool isSigned) {
   5050   // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
   5051   EVT VT = N->getValueType(0);
   5052   if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
   5053     SDNode *BVN = N->getOperand(0).getNode();
   5054     if (BVN->getValueType(0) != MVT::v4i32 ||
   5055         BVN->getOpcode() != ISD::BUILD_VECTOR)
   5056       return false;
   5057     unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
   5058     unsigned HiElt = 1 - LoElt;
   5059     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
   5060     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
   5061     ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
   5062     ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
   5063     if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
   5064       return false;
   5065     if (isSigned) {
   5066       if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
   5067           Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
   5068         return true;
   5069     } else {
   5070       if (Hi0->isNullValue() && Hi1->isNullValue())
   5071         return true;
   5072     }
   5073     return false;
   5074   }
   5075 
   5076   if (N->getOpcode() != ISD::BUILD_VECTOR)
   5077     return false;
   5078 
   5079   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   5080     SDNode *Elt = N->getOperand(i).getNode();
   5081     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
   5082       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   5083       unsigned HalfSize = EltSize / 2;
   5084       if (isSigned) {
   5085         if (!isIntN(HalfSize, C->getSExtValue()))
   5086           return false;
   5087       } else {
   5088         if (!isUIntN(HalfSize, C->getZExtValue()))
   5089           return false;
   5090       }
   5091       continue;
   5092     }
   5093     return false;
   5094   }
   5095 
   5096   return true;
   5097 }
   5098 
   5099 /// isSignExtended - Check if a node is a vector value that is sign-extended
   5100 /// or a constant BUILD_VECTOR with sign-extended elements.
   5101 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   5102   if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
   5103     return true;
   5104   if (isExtendedBUILD_VECTOR(N, DAG, true))
   5105     return true;
   5106   return false;
   5107 }
   5108 
   5109 /// isZeroExtended - Check if a node is a vector value that is zero-extended
   5110 /// or a constant BUILD_VECTOR with zero-extended elements.
   5111 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   5112   if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
   5113     return true;
   5114   if (isExtendedBUILD_VECTOR(N, DAG, false))
   5115     return true;
   5116   return false;
   5117 }
   5118 
   5119 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
   5120 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
   5121 /// We insert the required extension here to get the vector to fill a D register.
   5122 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
   5123                                             const EVT &OrigTy,
   5124                                             const EVT &ExtTy,
   5125                                             unsigned ExtOpcode) {
   5126   // The vector originally had a size of OrigTy. It was then extended to ExtTy.
   5127   // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
   5128   // 64-bits we need to insert a new extension so that it will be 64-bits.
   5129   assert(ExtTy.is128BitVector() && "Unexpected extension size");
   5130   if (OrigTy.getSizeInBits() >= 64)
   5131     return N;
   5132 
   5133   // Must extend size to at least 64 bits to be used as an operand for VMULL.
   5134   MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
   5135   EVT NewVT;
   5136   switch (OrigSimpleTy) {
   5137   default: llvm_unreachable("Unexpected Orig Vector Type");
   5138   case MVT::v2i8:
   5139   case MVT::v2i16:
   5140     NewVT = MVT::v2i32;
   5141     break;
   5142   case MVT::v4i8:
   5143     NewVT = MVT::v4i16;
   5144     break;
   5145   }
   5146   return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
   5147 }
   5148 
   5149 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
   5150 /// does not do any sign/zero extension. If the original vector is less
   5151 /// than 64 bits, an appropriate extension will be added after the load to
   5152 /// reach a total size of 64 bits. We have to add the extension separately
   5153 /// because ARM does not have a sign/zero extending load for vectors.
   5154 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
   5155   SDValue NonExtendingLoad =
   5156     DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
   5157                 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
   5158                 LD->isNonTemporal(), LD->isInvariant(),
   5159                 LD->getAlignment());
   5160   unsigned ExtOp = 0;
   5161   switch (LD->getExtensionType()) {
   5162   default: llvm_unreachable("Unexpected LoadExtType");
   5163   case ISD::EXTLOAD:
   5164   case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
   5165   case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
   5166   }
   5167   MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
   5168   MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
   5169   return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
   5170                                       MemType, ExtType, ExtOp);
   5171 }
   5172 
   5173 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
   5174 /// extending load, or BUILD_VECTOR with extended elements, return the
   5175 /// unextended value. The unextended vector should be 64 bits so that it can
   5176 /// be used as an operand to a VMULL instruction. If the original vector size
   5177 /// before extension is less than 64 bits we add a an extension to resize
   5178 /// the vector to 64 bits.
   5179 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   5180   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
   5181     return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
   5182                                         N->getOperand(0)->getValueType(0),
   5183                                         N->getValueType(0),
   5184                                         N->getOpcode());
   5185 
   5186   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
   5187     return SkipLoadExtensionForVMULL(LD, DAG);
   5188 
   5189   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
   5190   // have been legalized as a BITCAST from v4i32.
   5191   if (N->getOpcode() == ISD::BITCAST) {
   5192     SDNode *BVN = N->getOperand(0).getNode();
   5193     assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
   5194            BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
   5195     unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
   5196     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
   5197                        BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
   5198   }
   5199   // Construct a new BUILD_VECTOR with elements truncated to half the size.
   5200   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   5201   EVT VT = N->getValueType(0);
   5202   unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
   5203   unsigned NumElts = VT.getVectorNumElements();
   5204   MVT TruncVT = MVT::getIntegerVT(EltSize);
   5205   SmallVector<SDValue, 8> Ops;
   5206   for (unsigned i = 0; i != NumElts; ++i) {
   5207     ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
   5208     const APInt &CInt = C->getAPIntValue();
   5209     // Element types smaller than 32 bits are not legal, so use i32 elements.
   5210     // The values are implicitly truncated so sext vs. zext doesn't matter.
   5211     Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
   5212   }
   5213   return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
   5214                      MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
   5215 }
   5216 
   5217 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
   5218   unsigned Opcode = N->getOpcode();
   5219   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
   5220     SDNode *N0 = N->getOperand(0).getNode();
   5221     SDNode *N1 = N->getOperand(1).getNode();
   5222     return N0->hasOneUse() && N1->hasOneUse() &&
   5223       isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
   5224   }
   5225   return false;
   5226 }
   5227 
   5228 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
   5229   unsigned Opcode = N->getOpcode();
   5230   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
   5231     SDNode *N0 = N->getOperand(0).getNode();
   5232     SDNode *N1 = N->getOperand(1).getNode();
   5233     return N0->hasOneUse() && N1->hasOneUse() &&
   5234       isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
   5235   }
   5236   return false;
   5237 }
   5238 
   5239 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   5240   // Multiplications are only custom-lowered for 128-bit vectors so that
   5241   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
   5242   EVT VT = Op.getValueType();
   5243   assert(VT.is128BitVector() && VT.isInteger() &&
   5244          "unexpected type for custom-lowering ISD::MUL");
   5245   SDNode *N0 = Op.getOperand(0).getNode();
   5246   SDNode *N1 = Op.getOperand(1).getNode();
   5247   unsigned NewOpc = 0;
   5248   bool isMLA = false;
   5249   bool isN0SExt = isSignExtended(N0, DAG);
   5250   bool isN1SExt = isSignExtended(N1, DAG);
   5251   if (isN0SExt && isN1SExt)
   5252     NewOpc = ARMISD::VMULLs;
   5253   else {
   5254     bool isN0ZExt = isZeroExtended(N0, DAG);
   5255     bool isN1ZExt = isZeroExtended(N1, DAG);
   5256     if (isN0ZExt && isN1ZExt)
   5257       NewOpc = ARMISD::VMULLu;
   5258     else if (isN1SExt || isN1ZExt) {
   5259       // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
   5260       // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
   5261       if (isN1SExt && isAddSubSExt(N0, DAG)) {
   5262         NewOpc = ARMISD::VMULLs;
   5263         isMLA = true;
   5264       } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
   5265         NewOpc = ARMISD::VMULLu;
   5266         isMLA = true;
   5267       } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
   5268         std::swap(N0, N1);
   5269         NewOpc = ARMISD::VMULLu;
   5270         isMLA = true;
   5271       }
   5272     }
   5273 
   5274     if (!NewOpc) {
   5275       if (VT == MVT::v2i64)
   5276         // Fall through to expand this.  It is not legal.
   5277         return SDValue();
   5278       else
   5279         // Other vector multiplications are legal.
   5280         return Op;
   5281     }
   5282   }
   5283 
   5284   // Legalize to a VMULL instruction.
   5285   DebugLoc DL = Op.getDebugLoc();
   5286   SDValue Op0;
   5287   SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
   5288   if (!isMLA) {
   5289     Op0 = SkipExtensionForVMULL(N0, DAG);
   5290     assert(Op0.getValueType().is64BitVector() &&
   5291            Op1.getValueType().is64BitVector() &&
   5292            "unexpected types for extended operands to VMULL");
   5293     return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
   5294   }
   5295 
   5296   // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
   5297   // isel lowering to take advantage of no-stall back to back vmul + vmla.
   5298   //   vmull q0, d4, d6
   5299   //   vmlal q0, d5, d6
   5300   // is faster than
   5301   //   vaddl q0, d4, d5
   5302   //   vmovl q1, d6
   5303   //   vmul  q0, q0, q1
   5304   SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
   5305   SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
   5306   EVT Op1VT = Op1.getValueType();
   5307   return DAG.getNode(N0->getOpcode(), DL, VT,
   5308                      DAG.getNode(NewOpc, DL, VT,
   5309                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
   5310                      DAG.getNode(NewOpc, DL, VT,
   5311                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
   5312 }
   5313 
   5314 static SDValue
   5315 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   5316   // Convert to float
   5317   // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
   5318   // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
   5319   X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
   5320   Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
   5321   X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
   5322   Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
   5323   // Get reciprocal estimate.
   5324   // float4 recip = vrecpeq_f32(yf);
   5325   Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5326                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
   5327   // Because char has a smaller range than uchar, we can actually get away
   5328   // without any newton steps.  This requires that we use a weird bias
   5329   // of 0xb000, however (again, this has been exhaustively tested).
   5330   // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
   5331   X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
   5332   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
   5333   Y = DAG.getConstant(0xb000, MVT::i32);
   5334   Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
   5335   X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
   5336   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
   5337   // Convert back to short.
   5338   X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
   5339   X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
   5340   return X;
   5341 }
   5342 
   5343 static SDValue
   5344 LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
   5345   SDValue N2;
   5346   // Convert to float.
   5347   // float4 yf = vcvt_f32_s32(vmovl_s16(y));
   5348   // float4 xf = vcvt_f32_s32(vmovl_s16(x));
   5349   N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
   5350   N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
   5351   N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
   5352   N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
   5353 
   5354   // Use reciprocal estimate and one refinement step.
   5355   // float4 recip = vrecpeq_f32(yf);
   5356   // recip *= vrecpsq_f32(yf, recip);
   5357   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5358                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
   5359   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5360                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5361                    N1, N2);
   5362   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5363   // Because short has a smaller range than ushort, we can actually get away
   5364   // with only a single newton step.  This requires that we use a weird bias
   5365   // of 89, however (again, this has been exhaustively tested).
   5366   // float4 result = as_float4(as_int4(xf*recip) + 0x89);
   5367   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   5368   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
   5369   N1 = DAG.getConstant(0x89, MVT::i32);
   5370   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   5371   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   5372   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
   5373   // Convert back to integer and return.
   5374   // return vmovn_s32(vcvt_s32_f32(result));
   5375   N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
   5376   N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
   5377   return N0;
   5378 }
   5379 
   5380 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
   5381   EVT VT = Op.getValueType();
   5382   assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
   5383          "unexpected type for custom-lowering ISD::SDIV");
   5384 
   5385   DebugLoc dl = Op.getDebugLoc();
   5386   SDValue N0 = Op.getOperand(0);
   5387   SDValue N1 = Op.getOperand(1);
   5388   SDValue N2, N3;
   5389 
   5390   if (VT == MVT::v8i8) {
   5391     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
   5392     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
   5393 
   5394     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5395                      DAG.getIntPtrConstant(4));
   5396     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5397                      DAG.getIntPtrConstant(4));
   5398     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5399                      DAG.getIntPtrConstant(0));
   5400     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5401                      DAG.getIntPtrConstant(0));
   5402 
   5403     N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
   5404     N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
   5405 
   5406     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
   5407     N0 = LowerCONCAT_VECTORS(N0, DAG);
   5408 
   5409     N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
   5410     return N0;
   5411   }
   5412   return LowerSDIV_v4i16(N0, N1, dl, DAG);
   5413 }
   5414 
   5415 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   5416   EVT VT = Op.getValueType();
   5417   assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
   5418          "unexpected type for custom-lowering ISD::UDIV");
   5419 
   5420   DebugLoc dl = Op.getDebugLoc();
   5421   SDValue N0 = Op.getOperand(0);
   5422   SDValue N1 = Op.getOperand(1);
   5423   SDValue N2, N3;
   5424 
   5425   if (VT == MVT::v8i8) {
   5426     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
   5427     N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
   5428 
   5429     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5430                      DAG.getIntPtrConstant(4));
   5431     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5432                      DAG.getIntPtrConstant(4));
   5433     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5434                      DAG.getIntPtrConstant(0));
   5435     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5436                      DAG.getIntPtrConstant(0));
   5437 
   5438     N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
   5439     N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
   5440 
   5441     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
   5442     N0 = LowerCONCAT_VECTORS(N0, DAG);
   5443 
   5444     N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
   5445                      DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
   5446                      N0);
   5447     return N0;
   5448   }
   5449 
   5450   // v4i16 sdiv ... Convert to float.
   5451   // float4 yf = vcvt_f32_s32(vmovl_u16(y));
   5452   // float4 xf = vcvt_f32_s32(vmovl_u16(x));
   5453   N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
   5454   N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
   5455   N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
   5456   SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
   5457 
   5458   // Use reciprocal estimate and two refinement steps.
   5459   // float4 recip = vrecpeq_f32(yf);
   5460   // recip *= vrecpsq_f32(yf, recip);
   5461   // recip *= vrecpsq_f32(yf, recip);
   5462   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5463                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
   5464   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5465                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5466                    BN1, N2);
   5467   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5468   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5469                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5470                    BN1, N2);
   5471   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5472   // Simply multiplying by the reciprocal estimate can leave us a few ulps
   5473   // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
   5474   // and that it will never cause us to return an answer too large).
   5475   // float4 result = as_float4(as_int4(xf*recip) + 2);
   5476   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   5477   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
   5478   N1 = DAG.getConstant(2, MVT::i32);
   5479   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   5480   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   5481   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
   5482   // Convert back to integer and return.
   5483   // return vmovn_u32(vcvt_s32_f32(result));
   5484   N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
   5485   N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
   5486   return N0;
   5487 }
   5488 
   5489 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   5490   EVT VT = Op.getNode()->getValueType(0);
   5491   SDVTList VTs = DAG.getVTList(VT, MVT::i32);
   5492 
   5493   unsigned Opc;
   5494   bool ExtraOp = false;
   5495   switch (Op.getOpcode()) {
   5496   default: llvm_unreachable("Invalid code");
   5497   case ISD::ADDC: Opc = ARMISD::ADDC; break;
   5498   case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
   5499   case ISD::SUBC: Opc = ARMISD::SUBC; break;
   5500   case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
   5501   }
   5502 
   5503   if (!ExtraOp)
   5504     return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
   5505                        Op.getOperand(1));
   5506   return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
   5507                      Op.getOperand(1), Op.getOperand(2));
   5508 }
   5509 
   5510 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
   5511   // Monotonic load/store is legal for all targets
   5512   if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
   5513     return Op;
   5514 
   5515   // Aquire/Release load/store is not legal for targets without a
   5516   // dmb or equivalent available.
   5517   return SDValue();
   5518 }
   5519 
   5520 
   5521 static void
   5522 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
   5523                     SelectionDAG &DAG, unsigned NewOp) {
   5524   DebugLoc dl = Node->getDebugLoc();
   5525   assert (Node->getValueType(0) == MVT::i64 &&
   5526           "Only know how to expand i64 atomics");
   5527 
   5528   SmallVector<SDValue, 6> Ops;
   5529   Ops.push_back(Node->getOperand(0)); // Chain
   5530   Ops.push_back(Node->getOperand(1)); // Ptr
   5531   // Low part of Val1
   5532   Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5533                             Node->getOperand(2), DAG.getIntPtrConstant(0)));
   5534   // High part of Val1
   5535   Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5536                             Node->getOperand(2), DAG.getIntPtrConstant(1)));
   5537   if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
   5538     // High part of Val1
   5539     Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5540                               Node->getOperand(3), DAG.getIntPtrConstant(0)));
   5541     // High part of Val2
   5542     Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5543                               Node->getOperand(3), DAG.getIntPtrConstant(1)));
   5544   }
   5545   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   5546   SDValue Result =
   5547     DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
   5548                             cast<MemSDNode>(Node)->getMemOperand());
   5549   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
   5550   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
   5551   Results.push_back(Result.getValue(2));
   5552 }
   5553 
   5554 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   5555   switch (Op.getOpcode()) {
   5556   default: llvm_unreachable("Don't know how to custom lower this!");
   5557   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   5558   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
   5559   case ISD::GlobalAddress:
   5560     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
   5561       LowerGlobalAddressELF(Op, DAG);
   5562   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   5563   case ISD::SELECT:        return LowerSELECT(Op, DAG);
   5564   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
   5565   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   5566   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   5567   case ISD::VASTART:       return LowerVASTART(Op, DAG);
   5568   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   5569   case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
   5570   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   5571   case ISD::SINT_TO_FP:
   5572   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   5573   case ISD::FP_TO_SINT:
   5574   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   5575   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
   5576   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   5577   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   5578   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   5579   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   5580   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
   5581   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
   5582                                                                Subtarget);
   5583   case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
   5584   case ISD::SHL:
   5585   case ISD::SRL:
   5586   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
   5587   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
   5588   case ISD::SRL_PARTS:
   5589   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   5590   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
   5591   case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
   5592   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
   5593   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   5594   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
   5595   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
   5596   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   5597   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   5598   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
   5599   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   5600   case ISD::MUL:           return LowerMUL(Op, DAG);
   5601   case ISD::SDIV:          return LowerSDIV(Op, DAG);
   5602   case ISD::UDIV:          return LowerUDIV(Op, DAG);
   5603   case ISD::ADDC:
   5604   case ISD::ADDE:
   5605   case ISD::SUBC:
   5606   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   5607   case ISD::ATOMIC_LOAD:
   5608   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
   5609   }
   5610 }
   5611 
   5612 /// ReplaceNodeResults - Replace the results of node with an illegal result
   5613 /// type with new values built out of custom code.
   5614 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   5615                                            SmallVectorImpl<SDValue>&Results,
   5616                                            SelectionDAG &DAG) const {
   5617   SDValue Res;
   5618   switch (N->getOpcode()) {
   5619   default:
   5620     llvm_unreachable("Don't know how to custom expand this!");
   5621   case ISD::BITCAST:
   5622     Res = ExpandBITCAST(N, DAG);
   5623     break;
   5624   case ISD::SRL:
   5625   case ISD::SRA:
   5626     Res = Expand64BitShift(N, DAG, Subtarget);
   5627     break;
   5628   case ISD::ATOMIC_LOAD_ADD:
   5629     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
   5630     return;
   5631   case ISD::ATOMIC_LOAD_AND:
   5632     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
   5633     return;
   5634   case ISD::ATOMIC_LOAD_NAND:
   5635     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
   5636     return;
   5637   case ISD::ATOMIC_LOAD_OR:
   5638     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
   5639     return;
   5640   case ISD::ATOMIC_LOAD_SUB:
   5641     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
   5642     return;
   5643   case ISD::ATOMIC_LOAD_XOR:
   5644     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
   5645     return;
   5646   case ISD::ATOMIC_SWAP:
   5647     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
   5648     return;
   5649   case ISD::ATOMIC_CMP_SWAP:
   5650     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
   5651     return;
   5652   case ISD::ATOMIC_LOAD_MIN:
   5653     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
   5654     return;
   5655   case ISD::ATOMIC_LOAD_UMIN:
   5656     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
   5657     return;
   5658   case ISD::ATOMIC_LOAD_MAX:
   5659     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
   5660     return;
   5661   case ISD::ATOMIC_LOAD_UMAX:
   5662     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
   5663     return;
   5664   }
   5665   if (Res.getNode())
   5666     Results.push_back(Res);
   5667 }
   5668 
   5669 //===----------------------------------------------------------------------===//
   5670 //                           ARM Scheduler Hooks
   5671 //===----------------------------------------------------------------------===//
   5672 
   5673 MachineBasicBlock *
   5674 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   5675                                      MachineBasicBlock *BB,
   5676                                      unsigned Size) const {
   5677   unsigned dest    = MI->getOperand(0).getReg();
   5678   unsigned ptr     = MI->getOperand(1).getReg();
   5679   unsigned oldval  = MI->getOperand(2).getReg();
   5680   unsigned newval  = MI->getOperand(3).getReg();
   5681   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5682   DebugLoc dl = MI->getDebugLoc();
   5683   bool isThumb2 = Subtarget->isThumb2();
   5684 
   5685   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5686   unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
   5687     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   5688     (const TargetRegisterClass*)&ARM::GPRRegClass);
   5689 
   5690   if (isThumb2) {
   5691     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5692     MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
   5693     MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
   5694   }
   5695 
   5696   unsigned ldrOpc, strOpc;
   5697   switch (Size) {
   5698   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5699   case 1:
   5700     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5701     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
   5702     break;
   5703   case 2:
   5704     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
   5705     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
   5706     break;
   5707   case 4:
   5708     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
   5709     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
   5710     break;
   5711   }
   5712 
   5713   MachineFunction *MF = BB->getParent();
   5714   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5715   MachineFunction::iterator It = BB;
   5716   ++It; // insert the new blocks after the current block
   5717 
   5718   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5719   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5720   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5721   MF->insert(It, loop1MBB);
   5722   MF->insert(It, loop2MBB);
   5723   MF->insert(It, exitMBB);
   5724 
   5725   // Transfer the remainder of BB and its successor edges to exitMBB.
   5726   exitMBB->splice(exitMBB->begin(), BB,
   5727                   llvm::next(MachineBasicBlock::iterator(MI)),
   5728                   BB->end());
   5729   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5730 
   5731   //  thisMBB:
   5732   //   ...
   5733   //   fallthrough --> loop1MBB
   5734   BB->addSuccessor(loop1MBB);
   5735 
   5736   // loop1MBB:
   5737   //   ldrex dest, [ptr]
   5738   //   cmp dest, oldval
   5739   //   bne exitMBB
   5740   BB = loop1MBB;
   5741   MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
   5742   if (ldrOpc == ARM::t2LDREX)
   5743     MIB.addImm(0);
   5744   AddDefaultPred(MIB);
   5745   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
   5746                  .addReg(dest).addReg(oldval));
   5747   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5748     .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5749   BB->addSuccessor(loop2MBB);
   5750   BB->addSuccessor(exitMBB);
   5751 
   5752   // loop2MBB:
   5753   //   strex scratch, newval, [ptr]
   5754   //   cmp scratch, #0
   5755   //   bne loop1MBB
   5756   BB = loop2MBB;
   5757   MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
   5758   if (strOpc == ARM::t2STREX)
   5759     MIB.addImm(0);
   5760   AddDefaultPred(MIB);
   5761   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   5762                  .addReg(scratch).addImm(0));
   5763   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5764     .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5765   BB->addSuccessor(loop1MBB);
   5766   BB->addSuccessor(exitMBB);
   5767 
   5768   //  exitMBB:
   5769   //   ...
   5770   BB = exitMBB;
   5771 
   5772   MI->eraseFromParent();   // The instruction is gone now.
   5773 
   5774   return BB;
   5775 }
   5776 
   5777 MachineBasicBlock *
   5778 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   5779                                     unsigned Size, unsigned BinOpcode) const {
   5780   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   5781   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5782 
   5783   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5784   MachineFunction *MF = BB->getParent();
   5785   MachineFunction::iterator It = BB;
   5786   ++It;
   5787 
   5788   unsigned dest = MI->getOperand(0).getReg();
   5789   unsigned ptr = MI->getOperand(1).getReg();
   5790   unsigned incr = MI->getOperand(2).getReg();
   5791   DebugLoc dl = MI->getDebugLoc();
   5792   bool isThumb2 = Subtarget->isThumb2();
   5793 
   5794   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5795   if (isThumb2) {
   5796     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5797     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   5798   }
   5799 
   5800   unsigned ldrOpc, strOpc;
   5801   switch (Size) {
   5802   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5803   case 1:
   5804     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5805     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
   5806     break;
   5807   case 2:
   5808     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
   5809     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
   5810     break;
   5811   case 4:
   5812     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
   5813     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
   5814     break;
   5815   }
   5816 
   5817   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5818   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5819   MF->insert(It, loopMBB);
   5820   MF->insert(It, exitMBB);
   5821 
   5822   // Transfer the remainder of BB and its successor edges to exitMBB.
   5823   exitMBB->splice(exitMBB->begin(), BB,
   5824                   llvm::next(MachineBasicBlock::iterator(MI)),
   5825                   BB->end());
   5826   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5827 
   5828   const TargetRegisterClass *TRC = isThumb2 ?
   5829     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   5830     (const TargetRegisterClass*)&ARM::GPRRegClass;
   5831   unsigned scratch = MRI.createVirtualRegister(TRC);
   5832   unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
   5833 
   5834   //  thisMBB:
   5835   //   ...
   5836   //   fallthrough --> loopMBB
   5837   BB->addSuccessor(loopMBB);
   5838 
   5839   //  loopMBB:
   5840   //   ldrex dest, ptr
   5841   //   <binop> scratch2, dest, incr
   5842   //   strex scratch, scratch2, ptr
   5843   //   cmp scratch, #0
   5844   //   bne- loopMBB
   5845   //   fallthrough --> exitMBB
   5846   BB = loopMBB;
   5847   MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
   5848   if (ldrOpc == ARM::t2LDREX)
   5849     MIB.addImm(0);
   5850   AddDefaultPred(MIB);
   5851   if (BinOpcode) {
   5852     // operand order needs to go the other way for NAND
   5853     if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
   5854       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
   5855                      addReg(incr).addReg(dest)).addReg(0);
   5856     else
   5857       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
   5858                      addReg(dest).addReg(incr)).addReg(0);
   5859   }
   5860 
   5861   MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
   5862   if (strOpc == ARM::t2STREX)
   5863     MIB.addImm(0);
   5864   AddDefaultPred(MIB);
   5865   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   5866                  .addReg(scratch).addImm(0));
   5867   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5868     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5869 
   5870   BB->addSuccessor(loopMBB);
   5871   BB->addSuccessor(exitMBB);
   5872 
   5873   //  exitMBB:
   5874   //   ...
   5875   BB = exitMBB;
   5876 
   5877   MI->eraseFromParent();   // The instruction is gone now.
   5878 
   5879   return BB;
   5880 }
   5881 
   5882 MachineBasicBlock *
   5883 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
   5884                                           MachineBasicBlock *BB,
   5885                                           unsigned Size,
   5886                                           bool signExtend,
   5887                                           ARMCC::CondCodes Cond) const {
   5888   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5889 
   5890   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5891   MachineFunction *MF = BB->getParent();
   5892   MachineFunction::iterator It = BB;
   5893   ++It;
   5894 
   5895   unsigned dest = MI->getOperand(0).getReg();
   5896   unsigned ptr = MI->getOperand(1).getReg();
   5897   unsigned incr = MI->getOperand(2).getReg();
   5898   unsigned oldval = dest;
   5899   DebugLoc dl = MI->getDebugLoc();
   5900   bool isThumb2 = Subtarget->isThumb2();
   5901 
   5902   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5903   if (isThumb2) {
   5904     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5905     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   5906   }
   5907 
   5908   unsigned ldrOpc, strOpc, extendOpc;
   5909   switch (Size) {
   5910   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5911   case 1:
   5912     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5913     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
   5914     extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB;
   5915     break;
   5916   case 2:
   5917     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
   5918     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
   5919     extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH;
   5920     break;
   5921   case 4:
   5922     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
   5923     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
   5924     extendOpc = 0;
   5925     break;
   5926   }
   5927 
   5928   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5929   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5930   MF->insert(It, loopMBB);
   5931   MF->insert(It, exitMBB);
   5932 
   5933   // Transfer the remainder of BB and its successor edges to exitMBB.
   5934   exitMBB->splice(exitMBB->begin(), BB,
   5935                   llvm::next(MachineBasicBlock::iterator(MI)),
   5936                   BB->end());
   5937   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5938 
   5939   const TargetRegisterClass *TRC = isThumb2 ?
   5940     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   5941     (const TargetRegisterClass*)&ARM::GPRRegClass;
   5942   unsigned scratch = MRI.createVirtualRegister(TRC);
   5943   unsigned scratch2 = MRI.createVirtualRegister(TRC);
   5944 
   5945   //  thisMBB:
   5946   //   ...
   5947   //   fallthrough --> loopMBB
   5948   BB->addSuccessor(loopMBB);
   5949 
   5950   //  loopMBB:
   5951   //   ldrex dest, ptr
   5952   //   (sign extend dest, if required)
   5953   //   cmp dest, incr
   5954   //   cmov.cond scratch2, incr, dest
   5955   //   strex scratch, scratch2, ptr
   5956   //   cmp scratch, #0
   5957   //   bne- loopMBB
   5958   //   fallthrough --> exitMBB
   5959   BB = loopMBB;
   5960   MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
   5961   if (ldrOpc == ARM::t2LDREX)
   5962     MIB.addImm(0);
   5963   AddDefaultPred(MIB);
   5964 
   5965   // Sign extend the value, if necessary.
   5966   if (signExtend && extendOpc) {
   5967     oldval = MRI.createVirtualRegister(&ARM::GPRRegClass);
   5968     AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval)
   5969                      .addReg(dest)
   5970                      .addImm(0));
   5971   }
   5972 
   5973   // Build compare and cmov instructions.
   5974   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
   5975                  .addReg(oldval).addReg(incr));
   5976   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2)
   5977          .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR);
   5978 
   5979   MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
   5980   if (strOpc == ARM::t2STREX)
   5981     MIB.addImm(0);
   5982   AddDefaultPred(MIB);
   5983   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   5984                  .addReg(scratch).addImm(0));
   5985   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5986     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5987 
   5988   BB->addSuccessor(loopMBB);
   5989   BB->addSuccessor(exitMBB);
   5990 
   5991   //  exitMBB:
   5992   //   ...
   5993   BB = exitMBB;
   5994 
   5995   MI->eraseFromParent();   // The instruction is gone now.
   5996 
   5997   return BB;
   5998 }
   5999 
   6000 MachineBasicBlock *
   6001 ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB,
   6002                                       unsigned Op1, unsigned Op2,
   6003                                       bool NeedsCarry, bool IsCmpxchg,
   6004                                       bool IsMinMax, ARMCC::CondCodes CC) const {
   6005   // This also handles ATOMIC_SWAP, indicated by Op1==0.
   6006   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   6007 
   6008   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   6009   MachineFunction *MF = BB->getParent();
   6010   MachineFunction::iterator It = BB;
   6011   ++It;
   6012 
   6013   unsigned destlo = MI->getOperand(0).getReg();
   6014   unsigned desthi = MI->getOperand(1).getReg();
   6015   unsigned ptr = MI->getOperand(2).getReg();
   6016   unsigned vallo = MI->getOperand(3).getReg();
   6017   unsigned valhi = MI->getOperand(4).getReg();
   6018   DebugLoc dl = MI->getDebugLoc();
   6019   bool isThumb2 = Subtarget->isThumb2();
   6020 
   6021   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   6022   if (isThumb2) {
   6023     MRI.constrainRegClass(destlo, &ARM::rGPRRegClass);
   6024     MRI.constrainRegClass(desthi, &ARM::rGPRRegClass);
   6025     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   6026   }
   6027 
   6028   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   6029   MachineBasicBlock *contBB = 0, *cont2BB = 0;
   6030   if (IsCmpxchg || IsMinMax)
   6031     contBB = MF->CreateMachineBasicBlock(LLVM_BB);
   6032   if (IsCmpxchg)
   6033     cont2BB = MF->CreateMachineBasicBlock(LLVM_BB);
   6034   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   6035 
   6036   MF->insert(It, loopMBB);
   6037   if (IsCmpxchg || IsMinMax) MF->insert(It, contBB);
   6038   if (IsCmpxchg) MF->insert(It, cont2BB);
   6039   MF->insert(It, exitMBB);
   6040 
   6041   // Transfer the remainder of BB and its successor edges to exitMBB.
   6042   exitMBB->splice(exitMBB->begin(), BB,
   6043                   llvm::next(MachineBasicBlock::iterator(MI)),
   6044                   BB->end());
   6045   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   6046 
   6047   const TargetRegisterClass *TRC = isThumb2 ?
   6048     (const TargetRegisterClass*)&ARM::tGPRRegClass :
   6049     (const TargetRegisterClass*)&ARM::GPRRegClass;
   6050   unsigned storesuccess = MRI.createVirtualRegister(TRC);
   6051 
   6052   //  thisMBB:
   6053   //   ...
   6054   //   fallthrough --> loopMBB
   6055   BB->addSuccessor(loopMBB);
   6056 
   6057   //  loopMBB:
   6058   //   ldrexd r2, r3, ptr
   6059   //   <binopa> r0, r2, incr
   6060   //   <binopb> r1, r3, incr
   6061   //   strexd storesuccess, r0, r1, ptr
   6062   //   cmp storesuccess, #0
   6063   //   bne- loopMBB
   6064   //   fallthrough --> exitMBB
   6065   BB = loopMBB;
   6066 
   6067   // Load
   6068   if (isThumb2) {
   6069     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD))
   6070                    .addReg(destlo, RegState::Define)
   6071                    .addReg(desthi, RegState::Define)
   6072                    .addReg(ptr));
   6073   } else {
   6074     unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   6075     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD))
   6076                    .addReg(GPRPair0, RegState::Define).addReg(ptr));
   6077     // Copy r2/r3 into dest.  (This copy will normally be coalesced.)
   6078     BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo)
   6079       .addReg(GPRPair0, 0, ARM::gsub_0);
   6080     BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi)
   6081       .addReg(GPRPair0, 0, ARM::gsub_1);
   6082   }
   6083 
   6084   unsigned StoreLo, StoreHi;
   6085   if (IsCmpxchg) {
   6086     // Add early exit
   6087     for (unsigned i = 0; i < 2; i++) {
   6088       AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr :
   6089                                                          ARM::CMPrr))
   6090                      .addReg(i == 0 ? destlo : desthi)
   6091                      .addReg(i == 0 ? vallo : valhi));
   6092       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   6093         .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   6094       BB->addSuccessor(exitMBB);
   6095       BB->addSuccessor(i == 0 ? contBB : cont2BB);
   6096       BB = (i == 0 ? contBB : cont2BB);
   6097     }
   6098 
   6099     // Copy to physregs for strexd
   6100     StoreLo = MI->getOperand(5).getReg();
   6101     StoreHi = MI->getOperand(6).getReg();
   6102   } else if (Op1) {
   6103     // Perform binary operation
   6104     unsigned tmpRegLo = MRI.createVirtualRegister(TRC);
   6105     AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo)
   6106                    .addReg(destlo).addReg(vallo))
   6107         .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry));
   6108     unsigned tmpRegHi = MRI.createVirtualRegister(TRC);
   6109     AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi)
   6110                    .addReg(desthi).addReg(valhi))
   6111         .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax));
   6112 
   6113     StoreLo = tmpRegLo;
   6114     StoreHi = tmpRegHi;
   6115   } else {
   6116     // Copy to physregs for strexd
   6117     StoreLo = vallo;
   6118     StoreHi = valhi;
   6119   }
   6120   if (IsMinMax) {
   6121     // Compare and branch to exit block.
   6122     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   6123       .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR);
   6124     BB->addSuccessor(exitMBB);
   6125     BB->addSuccessor(contBB);
   6126     BB = contBB;
   6127     StoreLo = vallo;
   6128     StoreHi = valhi;
   6129   }
   6130 
   6131   // Store
   6132   if (isThumb2) {
   6133     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess)
   6134                    .addReg(StoreLo).addReg(StoreHi).addReg(ptr));
   6135   } else {
   6136     // Marshal a pair...
   6137     unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   6138     unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   6139     unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   6140     BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair);
   6141     BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1)
   6142       .addReg(UndefPair)
   6143       .addReg(StoreLo)
   6144       .addImm(ARM::gsub_0);
   6145     BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair)
   6146       .addReg(r1)
   6147       .addReg(StoreHi)
   6148       .addImm(ARM::gsub_1);
   6149 
   6150     // ...and store it
   6151     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess)
   6152                    .addReg(StorePair).addReg(ptr));
   6153   }
   6154   // Cmp+jump
   6155   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   6156                  .addReg(storesuccess).addImm(0));
   6157   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   6158     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   6159 
   6160   BB->addSuccessor(loopMBB);
   6161   BB->addSuccessor(exitMBB);
   6162 
   6163   //  exitMBB:
   6164   //   ...
   6165   BB = exitMBB;
   6166 
   6167   MI->eraseFromParent();   // The instruction is gone now.
   6168 
   6169   return BB;
   6170 }
   6171 
   6172 /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and
   6173 /// registers the function context.
   6174 void ARMTargetLowering::
   6175 SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB,
   6176                        MachineBasicBlock *DispatchBB, int FI) const {
   6177   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   6178   DebugLoc dl = MI->getDebugLoc();
   6179   MachineFunction *MF = MBB->getParent();
   6180   MachineRegisterInfo *MRI = &MF->getRegInfo();
   6181   MachineConstantPool *MCP = MF->getConstantPool();
   6182   ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
   6183   const Function *F = MF->getFunction();
   6184 
   6185   bool isThumb = Subtarget->isThumb();
   6186   bool isThumb2 = Subtarget->isThumb2();
   6187 
   6188   unsigned PCLabelId = AFI->createPICLabelUId();
   6189   unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8;
   6190   ARMConstantPoolValue *CPV =
   6191     ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj);
   6192   unsigned CPI = MCP->getConstantPoolIndex(CPV, 4);
   6193 
   6194   const TargetRegisterClass *TRC = isThumb ?
   6195     (const TargetRegisterClass*)&ARM::tGPRRegClass :
   6196     (const TargetRegisterClass*)&ARM::GPRRegClass;
   6197 
   6198   // Grab constant pool and fixed stack memory operands.
   6199   MachineMemOperand *CPMMO =
   6200     MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(),
   6201                              MachineMemOperand::MOLoad, 4, 4);
   6202 
   6203   MachineMemOperand *FIMMOSt =
   6204     MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
   6205                              MachineMemOperand::MOStore, 4, 4);
   6206 
   6207   // Load the address of the dispatch MBB into the jump buffer.
   6208   if (isThumb2) {
   6209     // Incoming value: jbuf
   6210     //   ldr.n  r5, LCPI1_1
   6211     //   orr    r5, r5, #1
   6212     //   add    r5, pc
   6213     //   str    r5, [$jbuf, #+4] ; &jbuf[1]
   6214     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6215     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1)
   6216                    .addConstantPoolIndex(CPI)
   6217                    .addMemOperand(CPMMO));
   6218     // Set the low bit because of thumb mode.
   6219     unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
   6220     AddDefaultCC(
   6221       AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2)
   6222                      .addReg(NewVReg1, RegState::Kill)
   6223                      .addImm(0x01)));
   6224     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
   6225     BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3)
   6226       .addReg(NewVReg2, RegState::Kill)
   6227       .addImm(PCLabelId);
   6228     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12))
   6229                    .addReg(NewVReg3, RegState::Kill)
   6230                    .addFrameIndex(FI)
   6231                    .addImm(36)  // &jbuf[1] :: pc
   6232                    .addMemOperand(FIMMOSt));
   6233   } else if (isThumb) {
   6234     // Incoming value: jbuf
   6235     //   ldr.n  r1, LCPI1_4
   6236     //   add    r1, pc
   6237     //   mov    r2, #1
   6238     //   orrs   r1, r2
   6239     //   add    r2, $jbuf, #+4 ; &jbuf[1]
   6240     //   str    r1, [r2]
   6241     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6242     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1)
   6243                    .addConstantPoolIndex(CPI)
   6244                    .addMemOperand(CPMMO));
   6245     unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
   6246     BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2)
   6247       .addReg(NewVReg1, RegState::Kill)
   6248       .addImm(PCLabelId);
   6249     // Set the low bit because of thumb mode.
   6250     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
   6251     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3)
   6252                    .addReg(ARM::CPSR, RegState::Define)
   6253                    .addImm(1));
   6254     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
   6255     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4)
   6256                    .addReg(ARM::CPSR, RegState::Define)
   6257                    .addReg(NewVReg2, RegState::Kill)
   6258                    .addReg(NewVReg3, RegState::Kill));
   6259     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
   6260     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5)
   6261                    .addFrameIndex(FI)
   6262                    .addImm(36)); // &jbuf[1] :: pc
   6263     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi))
   6264                    .addReg(NewVReg4, RegState::Kill)
   6265                    .addReg(NewVReg5, RegState::Kill)
   6266                    .addImm(0)
   6267                    .addMemOperand(FIMMOSt));
   6268   } else {
   6269     // Incoming value: jbuf
   6270     //   ldr  r1, LCPI1_1
   6271     //   add  r1, pc, r1
   6272     //   str  r1, [$jbuf, #+4] ; &jbuf[1]
   6273     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6274     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12),  NewVReg1)
   6275                    .addConstantPoolIndex(CPI)
   6276                    .addImm(0)
   6277                    .addMemOperand(CPMMO));
   6278     unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
   6279     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2)
   6280                    .addReg(NewVReg1, RegState::Kill)
   6281                    .addImm(PCLabelId));
   6282     AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12))
   6283                    .addReg(NewVReg2, RegState::Kill)
   6284                    .addFrameIndex(FI)
   6285                    .addImm(36)  // &jbuf[1] :: pc
   6286                    .addMemOperand(FIMMOSt));
   6287   }
   6288 }
   6289 
   6290 MachineBasicBlock *ARMTargetLowering::
   6291 EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const {
   6292   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   6293   DebugLoc dl = MI->getDebugLoc();
   6294   MachineFunction *MF = MBB->getParent();
   6295   MachineRegisterInfo *MRI = &MF->getRegInfo();
   6296   ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>();
   6297   MachineFrameInfo *MFI = MF->getFrameInfo();
   6298   int FI = MFI->getFunctionContextIndex();
   6299 
   6300   const TargetRegisterClass *TRC = Subtarget->isThumb() ?
   6301     (const TargetRegisterClass*)&ARM::tGPRRegClass :
   6302     (const TargetRegisterClass*)&ARM::GPRnopcRegClass;
   6303 
   6304   // Get a mapping of the call site numbers to all of the landing pads they're
   6305   // associated with.
   6306   DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
   6307   unsigned MaxCSNum = 0;
   6308   MachineModuleInfo &MMI = MF->getMMI();
   6309   for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
   6310        ++BB) {
   6311     if (!BB->isLandingPad()) continue;
   6312 
   6313     // FIXME: We should assert that the EH_LABEL is the first MI in the landing
   6314     // pad.
   6315     for (MachineBasicBlock::iterator
   6316            II = BB->begin(), IE = BB->end(); II != IE; ++II) {
   6317       if (!II->isEHLabel()) continue;
   6318 
   6319       MCSymbol *Sym = II->getOperand(0).getMCSymbol();
   6320       if (!MMI.hasCallSiteLandingPad(Sym)) continue;
   6321 
   6322       SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
   6323       for (SmallVectorImpl<unsigned>::iterator
   6324              CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
   6325            CSI != CSE; ++CSI) {
   6326         CallSiteNumToLPad[*CSI].push_back(BB);
   6327         MaxCSNum = std::max(MaxCSNum, *CSI);
   6328       }
   6329       break;
   6330     }
   6331   }
   6332 
   6333   // Get an ordered list of the machine basic blocks for the jump table.
   6334   std::vector<MachineBasicBlock*> LPadList;
   6335   SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs;
   6336   LPadList.reserve(CallSiteNumToLPad.size());
   6337   for (unsigned I = 1; I <= MaxCSNum; ++I) {
   6338     SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I];
   6339     for (SmallVectorImpl<MachineBasicBlock*>::iterator
   6340            II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) {
   6341       LPadList.push_back(*II);
   6342       InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end());
   6343     }
   6344   }
   6345 
   6346   assert(!LPadList.empty() &&
   6347          "No landing pad destinations for the dispatch jump table!");
   6348 
   6349   // Create the jump table and associated information.
   6350   MachineJumpTableInfo *JTI =
   6351     MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline);
   6352   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
   6353   unsigned UId = AFI->createJumpTableUId();
   6354   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   6355 
   6356   // Create the MBBs for the dispatch code.
   6357 
   6358   // Shove the dispatch's address into the return slot in the function context.
   6359   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
   6360   DispatchBB->setIsLandingPad();
   6361 
   6362   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
   6363   unsigned trap_opcode;
   6364   if (Subtarget->isThumb())
   6365     trap_opcode = ARM::tTRAP;
   6366   else
   6367     trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP;
   6368 
   6369   BuildMI(TrapBB, dl, TII->get(trap_opcode));
   6370   DispatchBB->addSuccessor(TrapBB);
   6371 
   6372   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
   6373   DispatchBB->addSuccessor(DispContBB);
   6374 
   6375   // Insert and MBBs.
   6376   MF->insert(MF->end(), DispatchBB);
   6377   MF->insert(MF->end(), DispContBB);
   6378   MF->insert(MF->end(), TrapBB);
   6379 
   6380   // Insert code into the entry block that creates and registers the function
   6381   // context.
   6382   SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI);
   6383 
   6384   MachineMemOperand *FIMMOLd =
   6385     MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
   6386                              MachineMemOperand::MOLoad |
   6387                              MachineMemOperand::MOVolatile, 4, 4);
   6388 
   6389   MachineInstrBuilder MIB;
   6390   MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup));
   6391 
   6392   const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII);
   6393   const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
   6394 
   6395   // Add a register mask with no preserved registers.  This results in all
   6396   // registers being marked as clobbered.
   6397   MIB.addRegMask(RI.getNoPreservedMask());
   6398 
   6399   unsigned NumLPads = LPadList.size();
   6400   if (Subtarget->isThumb2()) {
   6401     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6402     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1)
   6403                    .addFrameIndex(FI)
   6404                    .addImm(4)
   6405                    .addMemOperand(FIMMOLd));
   6406 
   6407     if (NumLPads < 256) {
   6408       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri))
   6409                      .addReg(NewVReg1)
   6410                      .addImm(LPadList.size()));
   6411     } else {
   6412       unsigned VReg1 = MRI->createVirtualRegister(TRC);
   6413       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1)
   6414                      .addImm(NumLPads & 0xFFFF));
   6415 
   6416       unsigned VReg2 = VReg1;
   6417       if ((NumLPads & 0xFFFF0000) != 0) {
   6418         VReg2 = MRI->createVirtualRegister(TRC);
   6419         AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2)
   6420                        .addReg(VReg1)
   6421                        .addImm(NumLPads >> 16));
   6422       }
   6423 
   6424       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr))
   6425                      .addReg(NewVReg1)
   6426                      .addReg(VReg2));
   6427     }
   6428 
   6429     BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc))
   6430       .addMBB(TrapBB)
   6431       .addImm(ARMCC::HI)
   6432       .addReg(ARM::CPSR);
   6433 
   6434     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
   6435     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3)
   6436                    .addJumpTableIndex(MJTI)
   6437                    .addImm(UId));
   6438 
   6439     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
   6440     AddDefaultCC(
   6441       AddDefaultPred(
   6442         BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4)
   6443         .addReg(NewVReg3, RegState::Kill)
   6444         .addReg(NewVReg1)
   6445         .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
   6446 
   6447     BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT))
   6448       .addReg(NewVReg4, RegState::Kill)
   6449       .addReg(NewVReg1)
   6450       .addJumpTableIndex(MJTI)
   6451       .addImm(UId);
   6452   } else if (Subtarget->isThumb()) {
   6453     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6454     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1)
   6455                    .addFrameIndex(FI)
   6456                    .addImm(1)
   6457                    .addMemOperand(FIMMOLd));
   6458 
   6459     if (NumLPads < 256) {
   6460       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8))
   6461                      .addReg(NewVReg1)
   6462                      .addImm(NumLPads));
   6463     } else {
   6464       MachineConstantPool *ConstantPool = MF->getConstantPool();
   6465       Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
   6466       const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
   6467 
   6468       // MachineConstantPool wants an explicit alignment.
   6469       unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
   6470       if (Align == 0)
   6471         Align = getDataLayout()->getTypeAllocSize(C->getType());
   6472       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
   6473 
   6474       unsigned VReg1 = MRI->createVirtualRegister(TRC);
   6475       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci))
   6476                      .addReg(VReg1, RegState::Define)
   6477                      .addConstantPoolIndex(Idx));
   6478       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr))
   6479                      .addReg(NewVReg1)
   6480                      .addReg(VReg1));
   6481     }
   6482 
   6483     BuildMI(DispatchBB, dl, TII->get(ARM::tBcc))
   6484       .addMBB(TrapBB)
   6485       .addImm(ARMCC::HI)
   6486       .addReg(ARM::CPSR);
   6487 
   6488     unsigned NewVReg2 = MRI->createVirtualRegister(TRC);
   6489     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2)
   6490                    .addReg(ARM::CPSR, RegState::Define)
   6491                    .addReg(NewVReg1)
   6492                    .addImm(2));
   6493 
   6494     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
   6495     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3)
   6496                    .addJumpTableIndex(MJTI)
   6497                    .addImm(UId));
   6498 
   6499     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
   6500     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4)
   6501                    .addReg(ARM::CPSR, RegState::Define)
   6502                    .addReg(NewVReg2, RegState::Kill)
   6503                    .addReg(NewVReg3));
   6504 
   6505     MachineMemOperand *JTMMOLd =
   6506       MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
   6507                                MachineMemOperand::MOLoad, 4, 4);
   6508 
   6509     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
   6510     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5)
   6511                    .addReg(NewVReg4, RegState::Kill)
   6512                    .addImm(0)
   6513                    .addMemOperand(JTMMOLd));
   6514 
   6515     unsigned NewVReg6 = NewVReg5;
   6516     if (RelocM == Reloc::PIC_) {
   6517       NewVReg6 = MRI->createVirtualRegister(TRC);
   6518       AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6)
   6519                      .addReg(ARM::CPSR, RegState::Define)
   6520                      .addReg(NewVReg5, RegState::Kill)
   6521                      .addReg(NewVReg3));
   6522     }
   6523 
   6524     BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr))
   6525       .addReg(NewVReg6, RegState::Kill)
   6526       .addJumpTableIndex(MJTI)
   6527       .addImm(UId);
   6528   } else {
   6529     unsigned NewVReg1 = MRI->createVirtualRegister(TRC);
   6530     AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1)
   6531                    .addFrameIndex(FI)
   6532                    .addImm(4)
   6533                    .addMemOperand(FIMMOLd));
   6534 
   6535     if (NumLPads < 256) {
   6536       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri))
   6537                      .addReg(NewVReg1)
   6538                      .addImm(NumLPads));
   6539     } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) {
   6540       unsigned VReg1 = MRI->createVirtualRegister(TRC);
   6541       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1)
   6542                      .addImm(NumLPads & 0xFFFF));
   6543 
   6544       unsigned VReg2 = VReg1;
   6545       if ((NumLPads & 0xFFFF0000) != 0) {
   6546         VReg2 = MRI->createVirtualRegister(TRC);
   6547         AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2)
   6548                        .addReg(VReg1)
   6549                        .addImm(NumLPads >> 16));
   6550       }
   6551 
   6552       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
   6553                      .addReg(NewVReg1)
   6554                      .addReg(VReg2));
   6555     } else {
   6556       MachineConstantPool *ConstantPool = MF->getConstantPool();
   6557       Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
   6558       const Constant *C = ConstantInt::get(Int32Ty, NumLPads);
   6559 
   6560       // MachineConstantPool wants an explicit alignment.
   6561       unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
   6562       if (Align == 0)
   6563         Align = getDataLayout()->getTypeAllocSize(C->getType());
   6564       unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
   6565 
   6566       unsigned VReg1 = MRI->createVirtualRegister(TRC);
   6567       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp))
   6568                      .addReg(VReg1, RegState::Define)
   6569                      .addConstantPoolIndex(Idx)
   6570                      .addImm(0));
   6571       AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr))
   6572                      .addReg(NewVReg1)
   6573                      .addReg(VReg1, RegState::Kill));
   6574     }
   6575 
   6576     BuildMI(DispatchBB, dl, TII->get(ARM::Bcc))
   6577       .addMBB(TrapBB)
   6578       .addImm(ARMCC::HI)
   6579       .addReg(ARM::CPSR);
   6580 
   6581     unsigned NewVReg3 = MRI->createVirtualRegister(TRC);
   6582     AddDefaultCC(
   6583       AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3)
   6584                      .addReg(NewVReg1)
   6585                      .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2))));
   6586     unsigned NewVReg4 = MRI->createVirtualRegister(TRC);
   6587     AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4)
   6588                    .addJumpTableIndex(MJTI)
   6589                    .addImm(UId));
   6590 
   6591     MachineMemOperand *JTMMOLd =
   6592       MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(),
   6593                                MachineMemOperand::MOLoad, 4, 4);
   6594     unsigned NewVReg5 = MRI->createVirtualRegister(TRC);
   6595     AddDefaultPred(
   6596       BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5)
   6597       .addReg(NewVReg3, RegState::Kill)
   6598       .addReg(NewVReg4)
   6599       .addImm(0)
   6600       .addMemOperand(JTMMOLd));
   6601 
   6602     if (RelocM == Reloc::PIC_) {
   6603       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd))
   6604         .addReg(NewVReg5, RegState::Kill)
   6605         .addReg(NewVReg4)
   6606         .addJumpTableIndex(MJTI)
   6607         .addImm(UId);
   6608     } else {
   6609       BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr))
   6610         .addReg(NewVReg5, RegState::Kill)
   6611         .addJumpTableIndex(MJTI)
   6612         .addImm(UId);
   6613     }
   6614   }
   6615 
   6616   // Add the jump table entries as successors to the MBB.
   6617   SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs;
   6618   for (std::vector<MachineBasicBlock*>::iterator
   6619          I = LPadList.begin(), E = LPadList.end(); I != E; ++I) {
   6620     MachineBasicBlock *CurMBB = *I;
   6621     if (SeenMBBs.insert(CurMBB))
   6622       DispContBB->addSuccessor(CurMBB);
   6623   }
   6624 
   6625   // N.B. the order the invoke BBs are processed in doesn't matter here.
   6626   const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF);
   6627   SmallVector<MachineBasicBlock*, 64> MBBLPads;
   6628   for (SmallPtrSet<MachineBasicBlock*, 64>::iterator
   6629          I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) {
   6630     MachineBasicBlock *BB = *I;
   6631 
   6632     // Remove the landing pad successor from the invoke block and replace it
   6633     // with the new dispatch block.
   6634     SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(),
   6635                                                   BB->succ_end());
   6636     while (!Successors.empty()) {
   6637       MachineBasicBlock *SMBB = Successors.pop_back_val();
   6638       if (SMBB->isLandingPad()) {
   6639         BB->removeSuccessor(SMBB);
   6640         MBBLPads.push_back(SMBB);
   6641       }
   6642     }
   6643 
   6644     BB->addSuccessor(DispatchBB);
   6645 
   6646     // Find the invoke call and mark all of the callee-saved registers as
   6647     // 'implicit defined' so that they're spilled. This prevents code from
   6648     // moving instructions to before the EH block, where they will never be
   6649     // executed.
   6650     for (MachineBasicBlock::reverse_iterator
   6651            II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) {
   6652       if (!II->isCall()) continue;
   6653 
   6654       DenseMap<unsigned, bool> DefRegs;
   6655       for (MachineInstr::mop_iterator
   6656              OI = II->operands_begin(), OE = II->operands_end();
   6657            OI != OE; ++OI) {
   6658         if (!OI->isReg()) continue;
   6659         DefRegs[OI->getReg()] = true;
   6660       }
   6661 
   6662       MachineInstrBuilder MIB(*MF, &*II);
   6663 
   6664       for (unsigned i = 0; SavedRegs[i] != 0; ++i) {
   6665         unsigned Reg = SavedRegs[i];
   6666         if (Subtarget->isThumb2() &&
   6667             !ARM::tGPRRegClass.contains(Reg) &&
   6668             !ARM::hGPRRegClass.contains(Reg))
   6669           continue;
   6670         if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg))
   6671           continue;
   6672         if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg))
   6673           continue;
   6674         if (!DefRegs[Reg])
   6675           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
   6676       }
   6677 
   6678       break;
   6679     }
   6680   }
   6681 
   6682   // Mark all former landing pads as non-landing pads. The dispatch is the only
   6683   // landing pad now.
   6684   for (SmallVectorImpl<MachineBasicBlock*>::iterator
   6685          I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I)
   6686     (*I)->setIsLandingPad(false);
   6687 
   6688   // The instruction is gone now.
   6689   MI->eraseFromParent();
   6690 
   6691   return MBB;
   6692 }
   6693 
   6694 static
   6695 MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
   6696   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
   6697        E = MBB->succ_end(); I != E; ++I)
   6698     if (*I != Succ)
   6699       return *I;
   6700   llvm_unreachable("Expecting a BB with two successors!");
   6701 }
   6702 
   6703 MachineBasicBlock *ARMTargetLowering::
   6704 EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const {
   6705   // This pseudo instruction has 3 operands: dst, src, size
   6706   // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold().
   6707   // Otherwise, we will generate unrolled scalar copies.
   6708   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   6709   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   6710   MachineFunction::iterator It = BB;
   6711   ++It;
   6712 
   6713   unsigned dest = MI->getOperand(0).getReg();
   6714   unsigned src = MI->getOperand(1).getReg();
   6715   unsigned SizeVal = MI->getOperand(2).getImm();
   6716   unsigned Align = MI->getOperand(3).getImm();
   6717   DebugLoc dl = MI->getDebugLoc();
   6718 
   6719   bool isThumb2 = Subtarget->isThumb2();
   6720   MachineFunction *MF = BB->getParent();
   6721   MachineRegisterInfo &MRI = MF->getRegInfo();
   6722   unsigned ldrOpc, strOpc, UnitSize = 0;
   6723 
   6724   const TargetRegisterClass *TRC = isThumb2 ?
   6725     (const TargetRegisterClass*)&ARM::tGPRRegClass :
   6726     (const TargetRegisterClass*)&ARM::GPRRegClass;
   6727   const TargetRegisterClass *TRC_Vec = 0;
   6728 
   6729   if (Align & 1) {
   6730     ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
   6731     strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
   6732     UnitSize = 1;
   6733   } else if (Align & 2) {
   6734     ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST;
   6735     strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST;
   6736     UnitSize = 2;
   6737   } else {
   6738     // Check whether we can use NEON instructions.
   6739     if (!MF->getFunction()->getAttributes().
   6740           hasAttribute(AttributeSet::FunctionIndex,
   6741                        Attribute::NoImplicitFloat) &&
   6742         Subtarget->hasNEON()) {
   6743       if ((Align % 16 == 0) && SizeVal >= 16) {
   6744         ldrOpc = ARM::VLD1q32wb_fixed;
   6745         strOpc = ARM::VST1q32wb_fixed;
   6746         UnitSize = 16;
   6747         TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass;
   6748       }
   6749       else if ((Align % 8 == 0) && SizeVal >= 8) {
   6750         ldrOpc = ARM::VLD1d32wb_fixed;
   6751         strOpc = ARM::VST1d32wb_fixed;
   6752         UnitSize = 8;
   6753         TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass;
   6754       }
   6755     }
   6756     // Can't use NEON instructions.
   6757     if (UnitSize == 0) {
   6758       ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
   6759       strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM;
   6760       UnitSize = 4;
   6761     }
   6762   }
   6763 
   6764   unsigned BytesLeft = SizeVal % UnitSize;
   6765   unsigned LoopSize = SizeVal - BytesLeft;
   6766 
   6767   if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) {
   6768     // Use LDR and STR to copy.
   6769     // [scratch, srcOut] = LDR_POST(srcIn, UnitSize)
   6770     // [destOut] = STR_POST(scratch, destIn, UnitSize)
   6771     unsigned srcIn = src;
   6772     unsigned destIn = dest;
   6773     for (unsigned i = 0; i < LoopSize; i+=UnitSize) {
   6774       unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
   6775       unsigned srcOut = MRI.createVirtualRegister(TRC);
   6776       unsigned destOut = MRI.createVirtualRegister(TRC);
   6777       if (UnitSize >= 8) {
   6778         AddDefaultPred(BuildMI(*BB, MI, dl,
   6779           TII->get(ldrOpc), scratch)
   6780           .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0));
   6781 
   6782         AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
   6783           .addReg(destIn).addImm(0).addReg(scratch));
   6784       } else if (isThumb2) {
   6785         AddDefaultPred(BuildMI(*BB, MI, dl,
   6786           TII->get(ldrOpc), scratch)
   6787           .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize));
   6788 
   6789         AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
   6790           .addReg(scratch).addReg(destIn)
   6791           .addImm(UnitSize));
   6792       } else {
   6793         AddDefaultPred(BuildMI(*BB, MI, dl,
   6794           TII->get(ldrOpc), scratch)
   6795           .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0)
   6796           .addImm(UnitSize));
   6797 
   6798         AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
   6799           .addReg(scratch).addReg(destIn)
   6800           .addReg(0).addImm(UnitSize));
   6801       }
   6802       srcIn = srcOut;
   6803       destIn = destOut;
   6804     }
   6805 
   6806     // Handle the leftover bytes with LDRB and STRB.
   6807     // [scratch, srcOut] = LDRB_POST(srcIn, 1)
   6808     // [destOut] = STRB_POST(scratch, destIn, 1)
   6809     ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
   6810     strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
   6811     for (unsigned i = 0; i < BytesLeft; i++) {
   6812       unsigned scratch = MRI.createVirtualRegister(TRC);
   6813       unsigned srcOut = MRI.createVirtualRegister(TRC);
   6814       unsigned destOut = MRI.createVirtualRegister(TRC);
   6815       if (isThumb2) {
   6816         AddDefaultPred(BuildMI(*BB, MI, dl,
   6817           TII->get(ldrOpc),scratch)
   6818           .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
   6819 
   6820         AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
   6821           .addReg(scratch).addReg(destIn)
   6822           .addReg(0).addImm(1));
   6823       } else {
   6824         AddDefaultPred(BuildMI(*BB, MI, dl,
   6825           TII->get(ldrOpc),scratch)
   6826           .addReg(srcOut, RegState::Define).addReg(srcIn)
   6827           .addReg(0).addImm(1));
   6828 
   6829         AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut)
   6830           .addReg(scratch).addReg(destIn)
   6831           .addReg(0).addImm(1));
   6832       }
   6833       srcIn = srcOut;
   6834       destIn = destOut;
   6835     }
   6836     MI->eraseFromParent();   // The instruction is gone now.
   6837     return BB;
   6838   }
   6839 
   6840   // Expand the pseudo op to a loop.
   6841   // thisMBB:
   6842   //   ...
   6843   //   movw varEnd, # --> with thumb2
   6844   //   movt varEnd, #
   6845   //   ldrcp varEnd, idx --> without thumb2
   6846   //   fallthrough --> loopMBB
   6847   // loopMBB:
   6848   //   PHI varPhi, varEnd, varLoop
   6849   //   PHI srcPhi, src, srcLoop
   6850   //   PHI destPhi, dst, destLoop
   6851   //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
   6852   //   [destLoop] = STR_POST(scratch, destPhi, UnitSize)
   6853   //   subs varLoop, varPhi, #UnitSize
   6854   //   bne loopMBB
   6855   //   fallthrough --> exitMBB
   6856   // exitMBB:
   6857   //   epilogue to handle left-over bytes
   6858   //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
   6859   //   [destOut] = STRB_POST(scratch, destLoop, 1)
   6860   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   6861   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   6862   MF->insert(It, loopMBB);
   6863   MF->insert(It, exitMBB);
   6864 
   6865   // Transfer the remainder of BB and its successor edges to exitMBB.
   6866   exitMBB->splice(exitMBB->begin(), BB,
   6867                   llvm::next(MachineBasicBlock::iterator(MI)),
   6868                   BB->end());
   6869   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   6870 
   6871   // Load an immediate to varEnd.
   6872   unsigned varEnd = MRI.createVirtualRegister(TRC);
   6873   if (isThumb2) {
   6874     unsigned VReg1 = varEnd;
   6875     if ((LoopSize & 0xFFFF0000) != 0)
   6876       VReg1 = MRI.createVirtualRegister(TRC);
   6877     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1)
   6878                    .addImm(LoopSize & 0xFFFF));
   6879 
   6880     if ((LoopSize & 0xFFFF0000) != 0)
   6881       AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd)
   6882                      .addReg(VReg1)
   6883                      .addImm(LoopSize >> 16));
   6884   } else {
   6885     MachineConstantPool *ConstantPool = MF->getConstantPool();
   6886     Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext());
   6887     const Constant *C = ConstantInt::get(Int32Ty, LoopSize);
   6888 
   6889     // MachineConstantPool wants an explicit alignment.
   6890     unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty);
   6891     if (Align == 0)
   6892       Align = getDataLayout()->getTypeAllocSize(C->getType());
   6893     unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
   6894 
   6895     AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp))
   6896                    .addReg(varEnd, RegState::Define)
   6897                    .addConstantPoolIndex(Idx)
   6898                    .addImm(0));
   6899   }
   6900   BB->addSuccessor(loopMBB);
   6901 
   6902   // Generate the loop body:
   6903   //   varPhi = PHI(varLoop, varEnd)
   6904   //   srcPhi = PHI(srcLoop, src)
   6905   //   destPhi = PHI(destLoop, dst)
   6906   MachineBasicBlock *entryBB = BB;
   6907   BB = loopMBB;
   6908   unsigned varLoop = MRI.createVirtualRegister(TRC);
   6909   unsigned varPhi = MRI.createVirtualRegister(TRC);
   6910   unsigned srcLoop = MRI.createVirtualRegister(TRC);
   6911   unsigned srcPhi = MRI.createVirtualRegister(TRC);
   6912   unsigned destLoop = MRI.createVirtualRegister(TRC);
   6913   unsigned destPhi = MRI.createVirtualRegister(TRC);
   6914 
   6915   BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi)
   6916     .addReg(varLoop).addMBB(loopMBB)
   6917     .addReg(varEnd).addMBB(entryBB);
   6918   BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi)
   6919     .addReg(srcLoop).addMBB(loopMBB)
   6920     .addReg(src).addMBB(entryBB);
   6921   BuildMI(BB, dl, TII->get(ARM::PHI), destPhi)
   6922     .addReg(destLoop).addMBB(loopMBB)
   6923     .addReg(dest).addMBB(entryBB);
   6924 
   6925   //   [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize)
   6926   //   [destLoop] = STR_POST(scratch, destPhi, UnitSiz)
   6927   unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC);
   6928   if (UnitSize >= 8) {
   6929     AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
   6930       .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0));
   6931 
   6932     AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
   6933       .addReg(destPhi).addImm(0).addReg(scratch));
   6934   } else if (isThumb2) {
   6935     AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
   6936       .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize));
   6937 
   6938     AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
   6939       .addReg(scratch).addReg(destPhi)
   6940       .addImm(UnitSize));
   6941   } else {
   6942     AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch)
   6943       .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0)
   6944       .addImm(UnitSize));
   6945 
   6946     AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop)
   6947       .addReg(scratch).addReg(destPhi)
   6948       .addReg(0).addImm(UnitSize));
   6949   }
   6950 
   6951   // Decrement loop variable by UnitSize.
   6952   MachineInstrBuilder MIB = BuildMI(BB, dl,
   6953     TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop);
   6954   AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize)));
   6955   MIB->getOperand(5).setReg(ARM::CPSR);
   6956   MIB->getOperand(5).setIsDef(true);
   6957 
   6958   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   6959     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   6960 
   6961   // loopMBB can loop back to loopMBB or fall through to exitMBB.
   6962   BB->addSuccessor(loopMBB);
   6963   BB->addSuccessor(exitMBB);
   6964 
   6965   // Add epilogue to handle BytesLeft.
   6966   BB = exitMBB;
   6967   MachineInstr *StartOfExit = exitMBB->begin();
   6968   ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM;
   6969   strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM;
   6970 
   6971   //   [scratch, srcOut] = LDRB_POST(srcLoop, 1)
   6972   //   [destOut] = STRB_POST(scratch, destLoop, 1)
   6973   unsigned srcIn = srcLoop;
   6974   unsigned destIn = destLoop;
   6975   for (unsigned i = 0; i < BytesLeft; i++) {
   6976     unsigned scratch = MRI.createVirtualRegister(TRC);
   6977     unsigned srcOut = MRI.createVirtualRegister(TRC);
   6978     unsigned destOut = MRI.createVirtualRegister(TRC);
   6979     if (isThumb2) {
   6980       AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
   6981         TII->get(ldrOpc),scratch)
   6982         .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1));
   6983 
   6984       AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
   6985         .addReg(scratch).addReg(destIn)
   6986         .addImm(1));
   6987     } else {
   6988       AddDefaultPred(BuildMI(*BB, StartOfExit, dl,
   6989         TII->get(ldrOpc),scratch)
   6990         .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1));
   6991 
   6992       AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut)
   6993         .addReg(scratch).addReg(destIn)
   6994         .addReg(0).addImm(1));
   6995     }
   6996     srcIn = srcOut;
   6997     destIn = destOut;
   6998   }
   6999 
   7000   MI->eraseFromParent();   // The instruction is gone now.
   7001   return BB;
   7002 }
   7003 
   7004 MachineBasicBlock *
   7005 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
   7006                                                MachineBasicBlock *BB) const {
   7007   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   7008   DebugLoc dl = MI->getDebugLoc();
   7009   bool isThumb2 = Subtarget->isThumb2();
   7010   switch (MI->getOpcode()) {
   7011   default: {
   7012     MI->dump();
   7013     llvm_unreachable("Unexpected instr type to insert");
   7014   }
   7015   // The Thumb2 pre-indexed stores have the same MI operands, they just
   7016   // define them differently in the .td files from the isel patterns, so
   7017   // they need pseudos.
   7018   case ARM::t2STR_preidx:
   7019     MI->setDesc(TII->get(ARM::t2STR_PRE));
   7020     return BB;
   7021   case ARM::t2STRB_preidx:
   7022     MI->setDesc(TII->get(ARM::t2STRB_PRE));
   7023     return BB;
   7024   case ARM::t2STRH_preidx:
   7025     MI->setDesc(TII->get(ARM::t2STRH_PRE));
   7026     return BB;
   7027 
   7028   case ARM::STRi_preidx:
   7029   case ARM::STRBi_preidx: {
   7030     unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ?
   7031       ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM;
   7032     // Decode the offset.
   7033     unsigned Offset = MI->getOperand(4).getImm();
   7034     bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub;
   7035     Offset = ARM_AM::getAM2Offset(Offset);
   7036     if (isSub)
   7037       Offset = -Offset;
   7038 
   7039     MachineMemOperand *MMO = *MI->memoperands_begin();
   7040     BuildMI(*BB, MI, dl, TII->get(NewOpc))
   7041       .addOperand(MI->getOperand(0))  // Rn_wb
   7042       .addOperand(MI->getOperand(1))  // Rt
   7043       .addOperand(MI->getOperand(2))  // Rn
   7044       .addImm(Offset)                 // offset (skip GPR==zero_reg)
   7045       .addOperand(MI->getOperand(5))  // pred
   7046       .addOperand(MI->getOperand(6))
   7047       .addMemOperand(MMO);
   7048     MI->eraseFromParent();
   7049     return BB;
   7050   }
   7051   case ARM::STRr_preidx:
   7052   case ARM::STRBr_preidx:
   7053   case ARM::STRH_preidx: {
   7054     unsigned NewOpc;
   7055     switch (MI->getOpcode()) {
   7056     default: llvm_unreachable("unexpected opcode!");
   7057     case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break;
   7058     case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break;
   7059     case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break;
   7060     }
   7061     MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc));
   7062     for (unsigned i = 0; i < MI->getNumOperands(); ++i)
   7063       MIB.addOperand(MI->getOperand(i));
   7064     MI->eraseFromParent();
   7065     return BB;
   7066   }
   7067   case ARM::ATOMIC_LOAD_ADD_I8:
   7068      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
   7069   case ARM::ATOMIC_LOAD_ADD_I16:
   7070      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
   7071   case ARM::ATOMIC_LOAD_ADD_I32:
   7072      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
   7073 
   7074   case ARM::ATOMIC_LOAD_AND_I8:
   7075      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
   7076   case ARM::ATOMIC_LOAD_AND_I16:
   7077      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
   7078   case ARM::ATOMIC_LOAD_AND_I32:
   7079      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
   7080 
   7081   case ARM::ATOMIC_LOAD_OR_I8:
   7082      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
   7083   case ARM::ATOMIC_LOAD_OR_I16:
   7084      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
   7085   case ARM::ATOMIC_LOAD_OR_I32:
   7086      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
   7087 
   7088   case ARM::ATOMIC_LOAD_XOR_I8:
   7089      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
   7090   case ARM::ATOMIC_LOAD_XOR_I16:
   7091      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
   7092   case ARM::ATOMIC_LOAD_XOR_I32:
   7093      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
   7094 
   7095   case ARM::ATOMIC_LOAD_NAND_I8:
   7096      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
   7097   case ARM::ATOMIC_LOAD_NAND_I16:
   7098      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
   7099   case ARM::ATOMIC_LOAD_NAND_I32:
   7100      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
   7101 
   7102   case ARM::ATOMIC_LOAD_SUB_I8:
   7103      return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
   7104   case ARM::ATOMIC_LOAD_SUB_I16:
   7105      return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
   7106   case ARM::ATOMIC_LOAD_SUB_I32:
   7107      return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
   7108 
   7109   case ARM::ATOMIC_LOAD_MIN_I8:
   7110      return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT);
   7111   case ARM::ATOMIC_LOAD_MIN_I16:
   7112      return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT);
   7113   case ARM::ATOMIC_LOAD_MIN_I32:
   7114      return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT);
   7115 
   7116   case ARM::ATOMIC_LOAD_MAX_I8:
   7117      return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT);
   7118   case ARM::ATOMIC_LOAD_MAX_I16:
   7119      return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT);
   7120   case ARM::ATOMIC_LOAD_MAX_I32:
   7121      return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT);
   7122 
   7123   case ARM::ATOMIC_LOAD_UMIN_I8:
   7124      return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO);
   7125   case ARM::ATOMIC_LOAD_UMIN_I16:
   7126      return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO);
   7127   case ARM::ATOMIC_LOAD_UMIN_I32:
   7128      return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO);
   7129 
   7130   case ARM::ATOMIC_LOAD_UMAX_I8:
   7131      return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI);
   7132   case ARM::ATOMIC_LOAD_UMAX_I16:
   7133      return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI);
   7134   case ARM::ATOMIC_LOAD_UMAX_I32:
   7135      return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI);
   7136 
   7137   case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
   7138   case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
   7139   case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
   7140 
   7141   case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
   7142   case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
   7143   case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
   7144 
   7145 
   7146   case ARM::ATOMADD6432:
   7147     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr,
   7148                               isThumb2 ? ARM::t2ADCrr : ARM::ADCrr,
   7149                               /*NeedsCarry*/ true);
   7150   case ARM::ATOMSUB6432:
   7151     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7152                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7153                               /*NeedsCarry*/ true);
   7154   case ARM::ATOMOR6432:
   7155     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr,
   7156                               isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
   7157   case ARM::ATOMXOR6432:
   7158     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr,
   7159                               isThumb2 ? ARM::t2EORrr : ARM::EORrr);
   7160   case ARM::ATOMAND6432:
   7161     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr,
   7162                               isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
   7163   case ARM::ATOMSWAP6432:
   7164     return EmitAtomicBinary64(MI, BB, 0, 0, false);
   7165   case ARM::ATOMCMPXCHG6432:
   7166     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7167                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7168                               /*NeedsCarry*/ false, /*IsCmpxchg*/true);
   7169   case ARM::ATOMMIN6432:
   7170     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7171                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7172                               /*NeedsCarry*/ true, /*IsCmpxchg*/false,
   7173                               /*IsMinMax*/ true, ARMCC::LT);
   7174   case ARM::ATOMMAX6432:
   7175     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7176                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7177                               /*NeedsCarry*/ true, /*IsCmpxchg*/false,
   7178                               /*IsMinMax*/ true, ARMCC::GE);
   7179   case ARM::ATOMUMIN6432:
   7180     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7181                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7182                               /*NeedsCarry*/ true, /*IsCmpxchg*/false,
   7183                               /*IsMinMax*/ true, ARMCC::LO);
   7184   case ARM::ATOMUMAX6432:
   7185     return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr,
   7186                               isThumb2 ? ARM::t2SBCrr : ARM::SBCrr,
   7187                               /*NeedsCarry*/ true, /*IsCmpxchg*/false,
   7188                               /*IsMinMax*/ true, ARMCC::HS);
   7189 
   7190   case ARM::tMOVCCr_pseudo: {
   7191     // To "insert" a SELECT_CC instruction, we actually have to insert the
   7192     // diamond control-flow pattern.  The incoming instruction knows the
   7193     // destination vreg to set, the condition code register to branch on, the
   7194     // true/false values to select between, and a branch opcode to use.
   7195     const BasicBlock *LLVM_BB = BB->getBasicBlock();
   7196     MachineFunction::iterator It = BB;
   7197     ++It;
   7198 
   7199     //  thisMBB:
   7200     //  ...
   7201     //   TrueVal = ...
   7202     //   cmpTY ccX, r1, r2
   7203     //   bCC copy1MBB
   7204     //   fallthrough --> copy0MBB
   7205     MachineBasicBlock *thisMBB  = BB;
   7206     MachineFunction *F = BB->getParent();
   7207     MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
   7208     MachineBasicBlock *sinkMBB  = F->CreateMachineBasicBlock(LLVM_BB);
   7209     F->insert(It, copy0MBB);
   7210     F->insert(It, sinkMBB);
   7211 
   7212     // Transfer the remainder of BB and its successor edges to sinkMBB.
   7213     sinkMBB->splice(sinkMBB->begin(), BB,
   7214                     llvm::next(MachineBasicBlock::iterator(MI)),
   7215                     BB->end());
   7216     sinkMBB->transferSuccessorsAndUpdatePHIs(BB);
   7217 
   7218     BB->addSuccessor(copy0MBB);
   7219     BB->addSuccessor(sinkMBB);
   7220 
   7221     BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB)
   7222       .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg());
   7223 
   7224     //  copy0MBB:
   7225     //   %FalseValue = ...
   7226     //   # fallthrough to sinkMBB
   7227     BB = copy0MBB;
   7228 
   7229     // Update machine-CFG edges
   7230     BB->addSuccessor(sinkMBB);
   7231 
   7232     //  sinkMBB:
   7233     //   %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
   7234     //  ...
   7235     BB = sinkMBB;
   7236     BuildMI(*BB, BB->begin(), dl,
   7237             TII->get(ARM::PHI), MI->getOperand(0).getReg())
   7238       .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB)
   7239       .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB);
   7240 
   7241     MI->eraseFromParent();   // The pseudo instruction is gone now.
   7242     return BB;
   7243   }
   7244 
   7245   case ARM::BCCi64:
   7246   case ARM::BCCZi64: {
   7247     // If there is an unconditional branch to the other successor, remove it.
   7248     BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end());
   7249 
   7250     // Compare both parts that make up the double comparison separately for
   7251     // equality.
   7252     bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
   7253 
   7254     unsigned LHS1 = MI->getOperand(1).getReg();
   7255     unsigned LHS2 = MI->getOperand(2).getReg();
   7256     if (RHSisZero) {
   7257       AddDefaultPred(BuildMI(BB, dl,
   7258                              TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   7259                      .addReg(LHS1).addImm(0));
   7260       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   7261         .addReg(LHS2).addImm(0)
   7262         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
   7263     } else {
   7264       unsigned RHS1 = MI->getOperand(3).getReg();
   7265       unsigned RHS2 = MI->getOperand(4).getReg();
   7266       AddDefaultPred(BuildMI(BB, dl,
   7267                              TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
   7268                      .addReg(LHS1).addReg(RHS1));
   7269       BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
   7270         .addReg(LHS2).addReg(RHS2)
   7271         .addImm(ARMCC::EQ).addReg(ARM::CPSR);
   7272     }
   7273 
   7274     MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
   7275     MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
   7276     if (MI->getOperand(0).getImm() == ARMCC::NE)
   7277       std::swap(destMBB, exitMBB);
   7278 
   7279     BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   7280       .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
   7281     if (isThumb2)
   7282       AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB));
   7283     else
   7284       BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB);
   7285 
   7286     MI->eraseFromParent();   // The pseudo instruction is gone now.
   7287     return BB;
   7288   }
   7289 
   7290   case ARM::Int_eh_sjlj_setjmp:
   7291   case ARM::Int_eh_sjlj_setjmp_nofp:
   7292   case ARM::tInt_eh_sjlj_setjmp:
   7293   case ARM::t2Int_eh_sjlj_setjmp:
   7294   case ARM::t2Int_eh_sjlj_setjmp_nofp:
   7295     EmitSjLjDispatchBlock(MI, BB);
   7296     return BB;
   7297 
   7298   case ARM::ABS:
   7299   case ARM::t2ABS: {
   7300     // To insert an ABS instruction, we have to insert the
   7301     // diamond control-flow pattern.  The incoming instruction knows the
   7302     // source vreg to test against 0, the destination vreg to set,
   7303     // the condition code register to branch on, the
   7304     // true/false values to select between, and a branch opcode to use.
   7305     // It transforms
   7306     //     V1 = ABS V0
   7307     // into
   7308     //     V2 = MOVS V0
   7309     //     BCC                      (branch to SinkBB if V0 >= 0)
   7310     //     RSBBB: V3 = RSBri V2, 0  (compute ABS if V2 < 0)
   7311     //     SinkBB: V1 = PHI(V2, V3)
   7312     const BasicBlock *LLVM_BB = BB->getBasicBlock();
   7313     MachineFunction::iterator BBI = BB;
   7314     ++BBI;
   7315     MachineFunction *Fn = BB->getParent();
   7316     MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
   7317     MachineBasicBlock *SinkBB  = Fn->CreateMachineBasicBlock(LLVM_BB);
   7318     Fn->insert(BBI, RSBBB);
   7319     Fn->insert(BBI, SinkBB);
   7320 
   7321     unsigned int ABSSrcReg = MI->getOperand(1).getReg();
   7322     unsigned int ABSDstReg = MI->getOperand(0).getReg();
   7323     bool isThumb2 = Subtarget->isThumb2();
   7324     MachineRegisterInfo &MRI = Fn->getRegInfo();
   7325     // In Thumb mode S must not be specified if source register is the SP or
   7326     // PC and if destination register is the SP, so restrict register class
   7327     unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ?
   7328       (const TargetRegisterClass*)&ARM::rGPRRegClass :
   7329       (const TargetRegisterClass*)&ARM::GPRRegClass);
   7330 
   7331     // Transfer the remainder of BB and its successor edges to sinkMBB.
   7332     SinkBB->splice(SinkBB->begin(), BB,
   7333       llvm::next(MachineBasicBlock::iterator(MI)),
   7334       BB->end());
   7335     SinkBB->transferSuccessorsAndUpdatePHIs(BB);
   7336 
   7337     BB->addSuccessor(RSBBB);
   7338     BB->addSuccessor(SinkBB);
   7339 
   7340     // fall through to SinkMBB
   7341     RSBBB->addSuccessor(SinkBB);
   7342 
   7343     // insert a cmp at the end of BB
   7344     AddDefaultPred(BuildMI(BB, dl,
   7345                            TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   7346                    .addReg(ABSSrcReg).addImm(0));
   7347 
   7348     // insert a bcc with opposite CC to ARMCC::MI at the end of BB
   7349     BuildMI(BB, dl,
   7350       TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
   7351       .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
   7352 
   7353     // insert rsbri in RSBBB
   7354     // Note: BCC and rsbri will be converted into predicated rsbmi
   7355     // by if-conversion pass
   7356     BuildMI(*RSBBB, RSBBB->begin(), dl,
   7357       TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
   7358       .addReg(ABSSrcReg, RegState::Kill)
   7359       .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
   7360 
   7361     // insert PHI in SinkBB,
   7362     // reuse ABSDstReg to not change uses of ABS instruction
   7363     BuildMI(*SinkBB, SinkBB->begin(), dl,
   7364       TII->get(ARM::PHI), ABSDstReg)
   7365       .addReg(NewRsbDstReg).addMBB(RSBBB)
   7366       .addReg(ABSSrcReg).addMBB(BB);
   7367 
   7368     // remove ABS instruction
   7369     MI->eraseFromParent();
   7370 
   7371     // return last added BB
   7372     return SinkBB;
   7373   }
   7374   case ARM::COPY_STRUCT_BYVAL_I32:
   7375     ++NumLoopByVals;
   7376     return EmitStructByval(MI, BB);
   7377   }
   7378 }
   7379 
   7380 void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
   7381                                                       SDNode *Node) const {
   7382   if (!MI->hasPostISelHook()) {
   7383     assert(!convertAddSubFlagsOpcode(MI->getOpcode()) &&
   7384            "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'");
   7385     return;
   7386   }
   7387 
   7388   const MCInstrDesc *MCID = &MI->getDesc();
   7389   // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB,
   7390   // RSC. Coming out of isel, they have an implicit CPSR def, but the optional
   7391   // operand is still set to noreg. If needed, set the optional operand's
   7392   // register to CPSR, and remove the redundant implicit def.
   7393   //
   7394   // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>).
   7395 
   7396   // Rename pseudo opcodes.
   7397   unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode());
   7398   if (NewOpc) {
   7399     const ARMBaseInstrInfo *TII =
   7400       static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo());
   7401     MCID = &TII->get(NewOpc);
   7402 
   7403     assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 &&
   7404            "converted opcode should be the same except for cc_out");
   7405 
   7406     MI->setDesc(*MCID);
   7407 
   7408     // Add the optional cc_out operand
   7409     MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true));
   7410   }
   7411   unsigned ccOutIdx = MCID->getNumOperands() - 1;
   7412 
   7413   // Any ARM instruction that sets the 's' bit should specify an optional
   7414   // "cc_out" operand in the last operand position.
   7415   if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) {
   7416     assert(!NewOpc && "Optional cc_out operand required");
   7417     return;
   7418   }
   7419   // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it
   7420   // since we already have an optional CPSR def.
   7421   bool definesCPSR = false;
   7422   bool deadCPSR = false;
   7423   for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands();
   7424        i != e; ++i) {
   7425     const MachineOperand &MO = MI->getOperand(i);
   7426     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) {
   7427       definesCPSR = true;
   7428       if (MO.isDead())
   7429         deadCPSR = true;
   7430       MI->RemoveOperand(i);
   7431       break;
   7432     }
   7433   }
   7434   if (!definesCPSR) {
   7435     assert(!NewOpc && "Optional cc_out operand required");
   7436     return;
   7437   }
   7438   assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag");
   7439   if (deadCPSR) {
   7440     assert(!MI->getOperand(ccOutIdx).getReg() &&
   7441            "expect uninitialized optional cc_out operand");
   7442     return;
   7443   }
   7444 
   7445   // If this instruction was defined with an optional CPSR def and its dag node
   7446   // had a live implicit CPSR def, then activate the optional CPSR def.
   7447   MachineOperand &MO = MI->getOperand(ccOutIdx);
   7448   MO.setReg(ARM::CPSR);
   7449   MO.setIsDef(true);
   7450 }
   7451 
   7452 //===----------------------------------------------------------------------===//
   7453 //                           ARM Optimization Hooks
   7454 //===----------------------------------------------------------------------===//
   7455 
   7456 // Helper function that checks if N is a null or all ones constant.
   7457 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
   7458   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
   7459   if (!C)
   7460     return false;
   7461   return AllOnes ? C->isAllOnesValue() : C->isNullValue();
   7462 }
   7463 
   7464 // Return true if N is conditionally 0 or all ones.
   7465 // Detects these expressions where cc is an i1 value:
   7466 //
   7467 //   (select cc 0, y)   [AllOnes=0]
   7468 //   (select cc y, 0)   [AllOnes=0]
   7469 //   (zext cc)          [AllOnes=0]
   7470 //   (sext cc)          [AllOnes=0/1]
   7471 //   (select cc -1, y)  [AllOnes=1]
   7472 //   (select cc y, -1)  [AllOnes=1]
   7473 //
   7474 // Invert is set when N is the null/all ones constant when CC is false.
   7475 // OtherOp is set to the alternative value of N.
   7476 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
   7477                                        SDValue &CC, bool &Invert,
   7478                                        SDValue &OtherOp,
   7479                                        SelectionDAG &DAG) {
   7480   switch (N->getOpcode()) {
   7481   default: return false;
   7482   case ISD::SELECT: {
   7483     CC = N->getOperand(0);
   7484     SDValue N1 = N->getOperand(1);
   7485     SDValue N2 = N->getOperand(2);
   7486     if (isZeroOrAllOnes(N1, AllOnes)) {
   7487       Invert = false;
   7488       OtherOp = N2;
   7489       return true;
   7490     }
   7491     if (isZeroOrAllOnes(N2, AllOnes)) {
   7492       Invert = true;
   7493       OtherOp = N1;
   7494       return true;
   7495     }
   7496     return false;
   7497   }
   7498   case ISD::ZERO_EXTEND:
   7499     // (zext cc) can never be the all ones value.
   7500     if (AllOnes)
   7501       return false;
   7502     // Fall through.
   7503   case ISD::SIGN_EXTEND: {
   7504     EVT VT = N->getValueType(0);
   7505     CC = N->getOperand(0);
   7506     if (CC.getValueType() != MVT::i1)
   7507       return false;
   7508     Invert = !AllOnes;
   7509     if (AllOnes)
   7510       // When looking for an AllOnes constant, N is an sext, and the 'other'
   7511       // value is 0.
   7512       OtherOp = DAG.getConstant(0, VT);
   7513     else if (N->getOpcode() == ISD::ZERO_EXTEND)
   7514       // When looking for a 0 constant, N can be zext or sext.
   7515       OtherOp = DAG.getConstant(1, VT);
   7516     else
   7517       OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
   7518     return true;
   7519   }
   7520   }
   7521 }
   7522 
   7523 // Combine a constant select operand into its use:
   7524 //
   7525 //   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
   7526 //   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
   7527 //   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
   7528 //   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
   7529 //   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
   7530 //
   7531 // The transform is rejected if the select doesn't have a constant operand that
   7532 // is null, or all ones when AllOnes is set.
   7533 //
   7534 // Also recognize sext/zext from i1:
   7535 //
   7536 //   (add (zext cc), x) -> (select cc (add x, 1), x)
   7537 //   (add (sext cc), x) -> (select cc (add x, -1), x)
   7538 //
   7539 // These transformations eventually create predicated instructions.
   7540 //
   7541 // @param N       The node to transform.
   7542 // @param Slct    The N operand that is a select.
   7543 // @param OtherOp The other N operand (x above).
   7544 // @param DCI     Context.
   7545 // @param AllOnes Require the select constant to be all ones instead of null.
   7546 // @returns The new node, or SDValue() on failure.
   7547 static
   7548 SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   7549                             TargetLowering::DAGCombinerInfo &DCI,
   7550                             bool AllOnes = false) {
   7551   SelectionDAG &DAG = DCI.DAG;
   7552   EVT VT = N->getValueType(0);
   7553   SDValue NonConstantVal;
   7554   SDValue CCOp;
   7555   bool SwapSelectOps;
   7556   if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
   7557                                   NonConstantVal, DAG))
   7558     return SDValue();
   7559 
   7560   // Slct is now know to be the desired identity constant when CC is true.
   7561   SDValue TrueVal = OtherOp;
   7562   SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT,
   7563                                  OtherOp, NonConstantVal);
   7564   // Unless SwapSelectOps says CC should be false.
   7565   if (SwapSelectOps)
   7566     std::swap(TrueVal, FalseVal);
   7567 
   7568   return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
   7569                      CCOp, TrueVal, FalseVal);
   7570 }
   7571 
   7572 // Attempt combineSelectAndUse on each operand of a commutative operator N.
   7573 static
   7574 SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes,
   7575                                        TargetLowering::DAGCombinerInfo &DCI) {
   7576   SDValue N0 = N->getOperand(0);
   7577   SDValue N1 = N->getOperand(1);
   7578   if (N0.getNode()->hasOneUse()) {
   7579     SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes);
   7580     if (Result.getNode())
   7581       return Result;
   7582   }
   7583   if (N1.getNode()->hasOneUse()) {
   7584     SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes);
   7585     if (Result.getNode())
   7586       return Result;
   7587   }
   7588   return SDValue();
   7589 }
   7590 
   7591 // AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
   7592 // (only after legalization).
   7593 static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
   7594                                  TargetLowering::DAGCombinerInfo &DCI,
   7595                                  const ARMSubtarget *Subtarget) {
   7596 
   7597   // Only perform optimization if after legalize, and if NEON is available. We
   7598   // also expected both operands to be BUILD_VECTORs.
   7599   if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
   7600       || N0.getOpcode() != ISD::BUILD_VECTOR
   7601       || N1.getOpcode() != ISD::BUILD_VECTOR)
   7602     return SDValue();
   7603 
   7604   // Check output type since VPADDL operand elements can only be 8, 16, or 32.
   7605   EVT VT = N->getValueType(0);
   7606   if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
   7607     return SDValue();
   7608 
   7609   // Check that the vector operands are of the right form.
   7610   // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
   7611   // operands, where N is the size of the formed vector.
   7612   // Each EXTRACT_VECTOR should have the same input vector and odd or even
   7613   // index such that we have a pair wise add pattern.
   7614 
   7615   // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
   7616   if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
   7617     return SDValue();
   7618   SDValue Vec = N0->getOperand(0)->getOperand(0);
   7619   SDNode *V = Vec.getNode();
   7620   unsigned nextIndex = 0;
   7621 
   7622   // For each operands to the ADD which are BUILD_VECTORs,
   7623   // check to see if each of their operands are an EXTRACT_VECTOR with
   7624   // the same vector and appropriate index.
   7625   for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
   7626     if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
   7627         && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   7628 
   7629       SDValue ExtVec0 = N0->getOperand(i);
   7630       SDValue ExtVec1 = N1->getOperand(i);
   7631 
   7632       // First operand is the vector, verify its the same.
   7633       if (V != ExtVec0->getOperand(0).getNode() ||
   7634           V != ExtVec1->getOperand(0).getNode())
   7635         return SDValue();
   7636 
   7637       // Second is the constant, verify its correct.
   7638       ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
   7639       ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
   7640 
   7641       // For the constant, we want to see all the even or all the odd.
   7642       if (!C0 || !C1 || C0->getZExtValue() != nextIndex
   7643           || C1->getZExtValue() != nextIndex+1)
   7644         return SDValue();
   7645 
   7646       // Increment index.
   7647       nextIndex+=2;
   7648     } else
   7649       return SDValue();
   7650   }
   7651 
   7652   // Create VPADDL node.
   7653   SelectionDAG &DAG = DCI.DAG;
   7654   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   7655 
   7656   // Build operand list.
   7657   SmallVector<SDValue, 8> Ops;
   7658   Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
   7659                                 TLI.getPointerTy()));
   7660 
   7661   // Input is the vector.
   7662   Ops.push_back(Vec);
   7663 
   7664   // Get widened type and narrowed type.
   7665   MVT widenType;
   7666   unsigned numElem = VT.getVectorNumElements();
   7667   switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
   7668     case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
   7669     case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
   7670     case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
   7671     default:
   7672       llvm_unreachable("Invalid vector element type for padd optimization.");
   7673   }
   7674 
   7675   SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
   7676                             widenType, &Ops[0], Ops.size());
   7677   return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
   7678 }
   7679 
   7680 static SDValue findMUL_LOHI(SDValue V) {
   7681   if (V->getOpcode() == ISD::UMUL_LOHI ||
   7682       V->getOpcode() == ISD::SMUL_LOHI)
   7683     return V;
   7684   return SDValue();
   7685 }
   7686 
   7687 static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode,
   7688                                      TargetLowering::DAGCombinerInfo &DCI,
   7689                                      const ARMSubtarget *Subtarget) {
   7690 
   7691   if (Subtarget->isThumb1Only()) return SDValue();
   7692 
   7693   // Only perform the checks after legalize when the pattern is available.
   7694   if (DCI.isBeforeLegalize()) return SDValue();
   7695 
   7696   // Look for multiply add opportunities.
   7697   // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where
   7698   // each add nodes consumes a value from ISD::UMUL_LOHI and there is
   7699   // a glue link from the first add to the second add.
   7700   // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by
   7701   // a S/UMLAL instruction.
   7702   //          loAdd   UMUL_LOHI
   7703   //            \    / :lo    \ :hi
   7704   //             \  /          \          [no multiline comment]
   7705   //              ADDC         |  hiAdd
   7706   //                 \ :glue  /  /
   7707   //                  \      /  /
   7708   //                    ADDE
   7709   //
   7710   assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC");
   7711   SDValue AddcOp0 = AddcNode->getOperand(0);
   7712   SDValue AddcOp1 = AddcNode->getOperand(1);
   7713 
   7714   // Check if the two operands are from the same mul_lohi node.
   7715   if (AddcOp0.getNode() == AddcOp1.getNode())
   7716     return SDValue();
   7717 
   7718   assert(AddcNode->getNumValues() == 2 &&
   7719          AddcNode->getValueType(0) == MVT::i32 &&
   7720          AddcNode->getValueType(1) == MVT::Glue &&
   7721          "Expect ADDC with two result values: i32, glue");
   7722 
   7723   // Check that the ADDC adds the low result of the S/UMUL_LOHI.
   7724   if (AddcOp0->getOpcode() != ISD::UMUL_LOHI &&
   7725       AddcOp0->getOpcode() != ISD::SMUL_LOHI &&
   7726       AddcOp1->getOpcode() != ISD::UMUL_LOHI &&
   7727       AddcOp1->getOpcode() != ISD::SMUL_LOHI)
   7728     return SDValue();
   7729 
   7730   // Look for the glued ADDE.
   7731   SDNode* AddeNode = AddcNode->getGluedUser();
   7732   if (AddeNode == NULL)
   7733     return SDValue();
   7734 
   7735   // Make sure it is really an ADDE.
   7736   if (AddeNode->getOpcode() != ISD::ADDE)
   7737     return SDValue();
   7738 
   7739   assert(AddeNode->getNumOperands() == 3 &&
   7740          AddeNode->getOperand(2).getValueType() == MVT::Glue &&
   7741          "ADDE node has the wrong inputs");
   7742 
   7743   // Check for the triangle shape.
   7744   SDValue AddeOp0 = AddeNode->getOperand(0);
   7745   SDValue AddeOp1 = AddeNode->getOperand(1);
   7746 
   7747   // Make sure that the ADDE operands are not coming from the same node.
   7748   if (AddeOp0.getNode() == AddeOp1.getNode())
   7749     return SDValue();
   7750 
   7751   // Find the MUL_LOHI node walking up ADDE's operands.
   7752   bool IsLeftOperandMUL = false;
   7753   SDValue MULOp = findMUL_LOHI(AddeOp0);
   7754   if (MULOp == SDValue())
   7755    MULOp = findMUL_LOHI(AddeOp1);
   7756   else
   7757     IsLeftOperandMUL = true;
   7758   if (MULOp == SDValue())
   7759      return SDValue();
   7760 
   7761   // Figure out the right opcode.
   7762   unsigned Opc = MULOp->getOpcode();
   7763   unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL;
   7764 
   7765   // Figure out the high and low input values to the MLAL node.
   7766   SDValue* HiMul = &MULOp;
   7767   SDValue* HiAdd = NULL;
   7768   SDValue* LoMul = NULL;
   7769   SDValue* LowAdd = NULL;
   7770 
   7771   if (IsLeftOperandMUL)
   7772     HiAdd = &AddeOp1;
   7773   else
   7774     HiAdd = &AddeOp0;
   7775 
   7776 
   7777   if (AddcOp0->getOpcode() == Opc) {
   7778     LoMul = &AddcOp0;
   7779     LowAdd = &AddcOp1;
   7780   }
   7781   if (AddcOp1->getOpcode() == Opc) {
   7782     LoMul = &AddcOp1;
   7783     LowAdd = &AddcOp0;
   7784   }
   7785 
   7786   if (LoMul == NULL)
   7787     return SDValue();
   7788 
   7789   if (LoMul->getNode() != HiMul->getNode())
   7790     return SDValue();
   7791 
   7792   // Create the merged node.
   7793   SelectionDAG &DAG = DCI.DAG;
   7794 
   7795   // Build operand list.
   7796   SmallVector<SDValue, 8> Ops;
   7797   Ops.push_back(LoMul->getOperand(0));
   7798   Ops.push_back(LoMul->getOperand(1));
   7799   Ops.push_back(*LowAdd);
   7800   Ops.push_back(*HiAdd);
   7801 
   7802   SDValue MLALNode =  DAG.getNode(FinalOpc, AddcNode->getDebugLoc(),
   7803                                  DAG.getVTList(MVT::i32, MVT::i32),
   7804                                  &Ops[0], Ops.size());
   7805 
   7806   // Replace the ADDs' nodes uses by the MLA node's values.
   7807   SDValue HiMLALResult(MLALNode.getNode(), 1);
   7808   DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult);
   7809 
   7810   SDValue LoMLALResult(MLALNode.getNode(), 0);
   7811   DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult);
   7812 
   7813   // Return original node to notify the driver to stop replacing.
   7814   SDValue resNode(AddcNode, 0);
   7815   return resNode;
   7816 }
   7817 
   7818 /// PerformADDCCombine - Target-specific dag combine transform from
   7819 /// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL.
   7820 static SDValue PerformADDCCombine(SDNode *N,
   7821                                  TargetLowering::DAGCombinerInfo &DCI,
   7822                                  const ARMSubtarget *Subtarget) {
   7823 
   7824   return AddCombineTo64bitMLAL(N, DCI, Subtarget);
   7825 
   7826 }
   7827 
   7828 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
   7829 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
   7830 /// called with the default operands, and if that fails, with commuted
   7831 /// operands.
   7832 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
   7833                                           TargetLowering::DAGCombinerInfo &DCI,
   7834                                           const ARMSubtarget *Subtarget){
   7835 
   7836   // Attempt to create vpaddl for this add.
   7837   SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
   7838   if (Result.getNode())
   7839     return Result;
   7840 
   7841   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
   7842   if (N0.getNode()->hasOneUse()) {
   7843     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
   7844     if (Result.getNode()) return Result;
   7845   }
   7846   return SDValue();
   7847 }
   7848 
   7849 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
   7850 ///
   7851 static SDValue PerformADDCombine(SDNode *N,
   7852                                  TargetLowering::DAGCombinerInfo &DCI,
   7853                                  const ARMSubtarget *Subtarget) {
   7854   SDValue N0 = N->getOperand(0);
   7855   SDValue N1 = N->getOperand(1);
   7856 
   7857   // First try with the default operand order.
   7858   SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
   7859   if (Result.getNode())
   7860     return Result;
   7861 
   7862   // If that didn't work, try again with the operands commuted.
   7863   return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
   7864 }
   7865 
   7866 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
   7867 ///
   7868 static SDValue PerformSUBCombine(SDNode *N,
   7869                                  TargetLowering::DAGCombinerInfo &DCI) {
   7870   SDValue N0 = N->getOperand(0);
   7871   SDValue N1 = N->getOperand(1);
   7872 
   7873   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
   7874   if (N1.getNode()->hasOneUse()) {
   7875     SDValue Result = combineSelectAndUse(N, N1, N0, DCI);
   7876     if (Result.getNode()) return Result;
   7877   }
   7878 
   7879   return SDValue();
   7880 }
   7881 
   7882 /// PerformVMULCombine
   7883 /// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the
   7884 /// special multiplier accumulator forwarding.
   7885 ///   vmul d3, d0, d2
   7886 ///   vmla d3, d1, d2
   7887 /// is faster than
   7888 ///   vadd d3, d0, d1
   7889 ///   vmul d3, d3, d2
   7890 static SDValue PerformVMULCombine(SDNode *N,
   7891                                   TargetLowering::DAGCombinerInfo &DCI,
   7892                                   const ARMSubtarget *Subtarget) {
   7893   if (!Subtarget->hasVMLxForwarding())
   7894     return SDValue();
   7895 
   7896   SelectionDAG &DAG = DCI.DAG;
   7897   SDValue N0 = N->getOperand(0);
   7898   SDValue N1 = N->getOperand(1);
   7899   unsigned Opcode = N0.getOpcode();
   7900   if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
   7901       Opcode != ISD::FADD && Opcode != ISD::FSUB) {
   7902     Opcode = N1.getOpcode();
   7903     if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
   7904         Opcode != ISD::FADD && Opcode != ISD::FSUB)
   7905       return SDValue();
   7906     std::swap(N0, N1);
   7907   }
   7908 
   7909   EVT VT = N->getValueType(0);
   7910   DebugLoc DL = N->getDebugLoc();
   7911   SDValue N00 = N0->getOperand(0);
   7912   SDValue N01 = N0->getOperand(1);
   7913   return DAG.getNode(Opcode, DL, VT,
   7914                      DAG.getNode(ISD::MUL, DL, VT, N00, N1),
   7915                      DAG.getNode(ISD::MUL, DL, VT, N01, N1));
   7916 }
   7917 
   7918 static SDValue PerformMULCombine(SDNode *N,
   7919                                  TargetLowering::DAGCombinerInfo &DCI,
   7920                                  const ARMSubtarget *Subtarget) {
   7921   SelectionDAG &DAG = DCI.DAG;
   7922 
   7923   if (Subtarget->isThumb1Only())
   7924     return SDValue();
   7925 
   7926   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
   7927     return SDValue();
   7928 
   7929   EVT VT = N->getValueType(0);
   7930   if (VT.is64BitVector() || VT.is128BitVector())
   7931     return PerformVMULCombine(N, DCI, Subtarget);
   7932   if (VT != MVT::i32)
   7933     return SDValue();
   7934 
   7935   ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   7936   if (!C)
   7937     return SDValue();
   7938 
   7939   int64_t MulAmt = C->getSExtValue();
   7940   unsigned ShiftAmt = CountTrailingZeros_64(MulAmt);
   7941 
   7942   ShiftAmt = ShiftAmt & (32 - 1);
   7943   SDValue V = N->getOperand(0);
   7944   DebugLoc DL = N->getDebugLoc();
   7945 
   7946   SDValue Res;
   7947   MulAmt >>= ShiftAmt;
   7948 
   7949   if (MulAmt >= 0) {
   7950     if (isPowerOf2_32(MulAmt - 1)) {
   7951       // (mul x, 2^N + 1) => (add (shl x, N), x)
   7952       Res = DAG.getNode(ISD::ADD, DL, VT,
   7953                         V,
   7954                         DAG.getNode(ISD::SHL, DL, VT,
   7955                                     V,
   7956                                     DAG.getConstant(Log2_32(MulAmt - 1),
   7957                                                     MVT::i32)));
   7958     } else if (isPowerOf2_32(MulAmt + 1)) {
   7959       // (mul x, 2^N - 1) => (sub (shl x, N), x)
   7960       Res = DAG.getNode(ISD::SUB, DL, VT,
   7961                         DAG.getNode(ISD::SHL, DL, VT,
   7962                                     V,
   7963                                     DAG.getConstant(Log2_32(MulAmt + 1),
   7964                                                     MVT::i32)),
   7965                         V);
   7966     } else
   7967       return SDValue();
   7968   } else {
   7969     uint64_t MulAmtAbs = -MulAmt;
   7970     if (isPowerOf2_32(MulAmtAbs + 1)) {
   7971       // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
   7972       Res = DAG.getNode(ISD::SUB, DL, VT,
   7973                         V,
   7974                         DAG.getNode(ISD::SHL, DL, VT,
   7975                                     V,
   7976                                     DAG.getConstant(Log2_32(MulAmtAbs + 1),
   7977                                                     MVT::i32)));
   7978     } else if (isPowerOf2_32(MulAmtAbs - 1)) {
   7979       // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
   7980       Res = DAG.getNode(ISD::ADD, DL, VT,
   7981                         V,
   7982                         DAG.getNode(ISD::SHL, DL, VT,
   7983                                     V,
   7984                                     DAG.getConstant(Log2_32(MulAmtAbs-1),
   7985                                                     MVT::i32)));
   7986       Res = DAG.getNode(ISD::SUB, DL, VT,
   7987                         DAG.getConstant(0, MVT::i32),Res);
   7988 
   7989     } else
   7990       return SDValue();
   7991   }
   7992 
   7993   if (ShiftAmt != 0)
   7994     Res = DAG.getNode(ISD::SHL, DL, VT,
   7995                       Res, DAG.getConstant(ShiftAmt, MVT::i32));
   7996 
   7997   // Do not add new nodes to DAG combiner worklist.
   7998   DCI.CombineTo(N, Res, false);
   7999   return SDValue();
   8000 }
   8001 
   8002 static SDValue PerformANDCombine(SDNode *N,
   8003                                  TargetLowering::DAGCombinerInfo &DCI,
   8004                                  const ARMSubtarget *Subtarget) {
   8005 
   8006   // Attempt to use immediate-form VBIC
   8007   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
   8008   DebugLoc dl = N->getDebugLoc();
   8009   EVT VT = N->getValueType(0);
   8010   SelectionDAG &DAG = DCI.DAG;
   8011 
   8012   if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
   8013     return SDValue();
   8014 
   8015   APInt SplatBits, SplatUndef;
   8016   unsigned SplatBitSize;
   8017   bool HasAnyUndefs;
   8018   if (BVN &&
   8019       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
   8020     if (SplatBitSize <= 64) {
   8021       EVT VbicVT;
   8022       SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(),
   8023                                       SplatUndef.getZExtValue(), SplatBitSize,
   8024                                       DAG, VbicVT, VT.is128BitVector(),
   8025                                       OtherModImm);
   8026       if (Val.getNode()) {
   8027         SDValue Input =
   8028           DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0));
   8029         SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val);
   8030         return DAG.getNode(ISD::BITCAST, dl, VT, Vbic);
   8031       }
   8032     }
   8033   }
   8034 
   8035   if (!Subtarget->isThumb1Only()) {
   8036     // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))
   8037     SDValue Result = combineSelectAndUseCommutative(N, true, DCI);
   8038     if (Result.getNode())
   8039       return Result;
   8040   }
   8041 
   8042   return SDValue();
   8043 }
   8044 
   8045 /// PerformORCombine - Target-specific dag combine xforms for ISD::OR
   8046 static SDValue PerformORCombine(SDNode *N,
   8047                                 TargetLowering::DAGCombinerInfo &DCI,
   8048                                 const ARMSubtarget *Subtarget) {
   8049   // Attempt to use immediate-form VORR
   8050   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
   8051   DebugLoc dl = N->getDebugLoc();
   8052   EVT VT = N->getValueType(0);
   8053   SelectionDAG &DAG = DCI.DAG;
   8054 
   8055   if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
   8056     return SDValue();
   8057 
   8058   APInt SplatBits, SplatUndef;
   8059   unsigned SplatBitSize;
   8060   bool HasAnyUndefs;
   8061   if (BVN && Subtarget->hasNEON() &&
   8062       BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
   8063     if (SplatBitSize <= 64) {
   8064       EVT VorrVT;
   8065       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
   8066                                       SplatUndef.getZExtValue(), SplatBitSize,
   8067                                       DAG, VorrVT, VT.is128BitVector(),
   8068                                       OtherModImm);
   8069       if (Val.getNode()) {
   8070         SDValue Input =
   8071           DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0));
   8072         SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val);
   8073         return DAG.getNode(ISD::BITCAST, dl, VT, Vorr);
   8074       }
   8075     }
   8076   }
   8077 
   8078   if (!Subtarget->isThumb1Only()) {
   8079     // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c))
   8080     SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
   8081     if (Result.getNode())
   8082       return Result;
   8083   }
   8084 
   8085   // The code below optimizes (or (and X, Y), Z).
   8086   // The AND operand needs to have a single user to make these optimizations
   8087   // profitable.
   8088   SDValue N0 = N->getOperand(0);
   8089   if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
   8090     return SDValue();
   8091   SDValue N1 = N->getOperand(1);
   8092 
   8093   // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
   8094   if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() &&
   8095       DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
   8096     APInt SplatUndef;
   8097     unsigned SplatBitSize;
   8098     bool HasAnyUndefs;
   8099 
   8100     BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
   8101     APInt SplatBits0;
   8102     if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
   8103                                   HasAnyUndefs) && !HasAnyUndefs) {
   8104       BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
   8105       APInt SplatBits1;
   8106       if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
   8107                                     HasAnyUndefs) && !HasAnyUndefs &&
   8108           SplatBits0 == ~SplatBits1) {
   8109         // Canonicalize the vector type to make instruction selection simpler.
   8110         EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   8111         SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
   8112                                      N0->getOperand(1), N0->getOperand(0),
   8113                                      N1->getOperand(0));
   8114         return DAG.getNode(ISD::BITCAST, dl, VT, Result);
   8115       }
   8116     }
   8117   }
   8118 
   8119   // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when
   8120   // reasonable.
   8121 
   8122   // BFI is only available on V6T2+
   8123   if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
   8124     return SDValue();
   8125 
   8126   DebugLoc DL = N->getDebugLoc();
   8127   // 1) or (and A, mask), val => ARMbfi A, val, mask
   8128   //      iff (val & mask) == val
   8129   //
   8130   // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
   8131   //  2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2)
   8132   //          && mask == ~mask2
   8133   //  2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2)
   8134   //          && ~mask == mask2
   8135   //  (i.e., copy a bitfield value into another bitfield of the same width)
   8136 
   8137   if (VT != MVT::i32)
   8138     return SDValue();
   8139 
   8140   SDValue N00 = N0.getOperand(0);
   8141 
   8142   // The value and the mask need to be constants so we can verify this is
   8143   // actually a bitfield set. If the mask is 0xffff, we can do better
   8144   // via a movt instruction, so don't use BFI in that case.
   8145   SDValue MaskOp = N0.getOperand(1);
   8146   ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp);
   8147   if (!MaskC)
   8148     return SDValue();
   8149   unsigned Mask = MaskC->getZExtValue();
   8150   if (Mask == 0xffff)
   8151     return SDValue();
   8152   SDValue Res;
   8153   // Case (1): or (and A, mask), val => ARMbfi A, val, mask
   8154   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   8155   if (N1C) {
   8156     unsigned Val = N1C->getZExtValue();
   8157     if ((Val & ~Mask) != Val)
   8158       return SDValue();
   8159 
   8160     if (ARM::isBitFieldInvertedMask(Mask)) {
   8161       Val >>= CountTrailingZeros_32(~Mask);
   8162 
   8163       Res = DAG.getNode(ARMISD::BFI, DL, VT, N00,
   8164                         DAG.getConstant(Val, MVT::i32),
   8165                         DAG.getConstant(Mask, MVT::i32));
   8166 
   8167       // Do not add new nodes to DAG combiner worklist.
   8168       DCI.CombineTo(N, Res, false);
   8169       return SDValue();
   8170     }
   8171   } else if (N1.getOpcode() == ISD::AND) {
   8172     // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask
   8173     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   8174     if (!N11C)
   8175       return SDValue();
   8176     unsigned Mask2 = N11C->getZExtValue();
   8177 
   8178     // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern
   8179     // as is to match.
   8180     if (ARM::isBitFieldInvertedMask(Mask) &&
   8181         (Mask == ~Mask2)) {
   8182       // The pack halfword instruction works better for masks that fit it,
   8183       // so use that when it's available.
   8184       if (Subtarget->hasT2ExtractPack() &&
   8185           (Mask == 0xffff || Mask == 0xffff0000))
   8186         return SDValue();
   8187       // 2a
   8188       unsigned amt = CountTrailingZeros_32(Mask2);
   8189       Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0),
   8190                         DAG.getConstant(amt, MVT::i32));
   8191       Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res,
   8192                         DAG.getConstant(Mask, MVT::i32));
   8193       // Do not add new nodes to DAG combiner worklist.
   8194       DCI.CombineTo(N, Res, false);
   8195       return SDValue();
   8196     } else if (ARM::isBitFieldInvertedMask(~Mask) &&
   8197                (~Mask == Mask2)) {
   8198       // The pack halfword instruction works better for masks that fit it,
   8199       // so use that when it's available.
   8200       if (Subtarget->hasT2ExtractPack() &&
   8201           (Mask2 == 0xffff || Mask2 == 0xffff0000))
   8202         return SDValue();
   8203       // 2b
   8204       unsigned lsb = CountTrailingZeros_32(Mask);
   8205       Res = DAG.getNode(ISD::SRL, DL, VT, N00,
   8206                         DAG.getConstant(lsb, MVT::i32));
   8207       Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res,
   8208                         DAG.getConstant(Mask2, MVT::i32));
   8209       // Do not add new nodes to DAG combiner worklist.
   8210       DCI.CombineTo(N, Res, false);
   8211       return SDValue();
   8212     }
   8213   }
   8214 
   8215   if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) &&
   8216       N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) &&
   8217       ARM::isBitFieldInvertedMask(~Mask)) {
   8218     // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask
   8219     // where lsb(mask) == #shamt and masked bits of B are known zero.
   8220     SDValue ShAmt = N00.getOperand(1);
   8221     unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue();
   8222     unsigned LSB = CountTrailingZeros_32(Mask);
   8223     if (ShAmtC != LSB)
   8224       return SDValue();
   8225 
   8226     Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0),
   8227                       DAG.getConstant(~Mask, MVT::i32));
   8228 
   8229     // Do not add new nodes to DAG combiner worklist.
   8230     DCI.CombineTo(N, Res, false);
   8231   }
   8232 
   8233   return SDValue();
   8234 }
   8235 
   8236 static SDValue PerformXORCombine(SDNode *N,
   8237                                  TargetLowering::DAGCombinerInfo &DCI,
   8238                                  const ARMSubtarget *Subtarget) {
   8239   EVT VT = N->getValueType(0);
   8240   SelectionDAG &DAG = DCI.DAG;
   8241 
   8242   if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
   8243     return SDValue();
   8244 
   8245   if (!Subtarget->isThumb1Only()) {
   8246     // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c))
   8247     SDValue Result = combineSelectAndUseCommutative(N, false, DCI);
   8248     if (Result.getNode())
   8249       return Result;
   8250   }
   8251 
   8252   return SDValue();
   8253 }
   8254 
   8255 /// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
   8256 /// the bits being cleared by the AND are not demanded by the BFI.
   8257 static SDValue PerformBFICombine(SDNode *N,
   8258                                  TargetLowering::DAGCombinerInfo &DCI) {
   8259   SDValue N1 = N->getOperand(1);
   8260   if (N1.getOpcode() == ISD::AND) {
   8261     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   8262     if (!N11C)
   8263       return SDValue();
   8264     unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
   8265     unsigned LSB = CountTrailingZeros_32(~InvMask);
   8266     unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
   8267     unsigned Mask = (1 << Width)-1;
   8268     unsigned Mask2 = N11C->getZExtValue();
   8269     if ((Mask & (~Mask2)) == 0)
   8270       return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
   8271                              N->getOperand(0), N1.getOperand(0),
   8272                              N->getOperand(2));
   8273   }
   8274   return SDValue();
   8275 }
   8276 
   8277 /// PerformVMOVRRDCombine - Target-specific dag combine xforms for
   8278 /// ARMISD::VMOVRRD.
   8279 static SDValue PerformVMOVRRDCombine(SDNode *N,
   8280                                      TargetLowering::DAGCombinerInfo &DCI) {
   8281   // vmovrrd(vmovdrr x, y) -> x,y
   8282   SDValue InDouble = N->getOperand(0);
   8283   if (InDouble.getOpcode() == ARMISD::VMOVDRR)
   8284     return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
   8285 
   8286   // vmovrrd(load f64) -> (load i32), (load i32)
   8287   SDNode *InNode = InDouble.getNode();
   8288   if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() &&
   8289       InNode->getValueType(0) == MVT::f64 &&
   8290       InNode->getOperand(1).getOpcode() == ISD::FrameIndex &&
   8291       !cast<LoadSDNode>(InNode)->isVolatile()) {
   8292     // TODO: Should this be done for non-FrameIndex operands?
   8293     LoadSDNode *LD = cast<LoadSDNode>(InNode);
   8294 
   8295     SelectionDAG &DAG = DCI.DAG;
   8296     DebugLoc DL = LD->getDebugLoc();
   8297     SDValue BasePtr = LD->getBasePtr();
   8298     SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr,
   8299                                  LD->getPointerInfo(), LD->isVolatile(),
   8300                                  LD->isNonTemporal(), LD->isInvariant(),
   8301                                  LD->getAlignment());
   8302 
   8303     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
   8304                                     DAG.getConstant(4, MVT::i32));
   8305     SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr,
   8306                                  LD->getPointerInfo(), LD->isVolatile(),
   8307                                  LD->isNonTemporal(), LD->isInvariant(),
   8308                                  std::min(4U, LD->getAlignment() / 2));
   8309 
   8310     DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1));
   8311     SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2);
   8312     DCI.RemoveFromWorklist(LD);
   8313     DAG.DeleteNode(LD);
   8314     return Result;
   8315   }
   8316 
   8317   return SDValue();
   8318 }
   8319 
   8320 /// PerformVMOVDRRCombine - Target-specific dag combine xforms for
   8321 /// ARMISD::VMOVDRR.  This is also used for BUILD_VECTORs with 2 operands.
   8322 static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) {
   8323   // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X)
   8324   SDValue Op0 = N->getOperand(0);
   8325   SDValue Op1 = N->getOperand(1);
   8326   if (Op0.getOpcode() == ISD::BITCAST)
   8327     Op0 = Op0.getOperand(0);
   8328   if (Op1.getOpcode() == ISD::BITCAST)
   8329     Op1 = Op1.getOperand(0);
   8330   if (Op0.getOpcode() == ARMISD::VMOVRRD &&
   8331       Op0.getNode() == Op1.getNode() &&
   8332       Op0.getResNo() == 0 && Op1.getResNo() == 1)
   8333     return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
   8334                        N->getValueType(0), Op0.getOperand(0));
   8335   return SDValue();
   8336 }
   8337 
   8338 /// PerformSTORECombine - Target-specific dag combine xforms for
   8339 /// ISD::STORE.
   8340 static SDValue PerformSTORECombine(SDNode *N,
   8341                                    TargetLowering::DAGCombinerInfo &DCI) {
   8342   StoreSDNode *St = cast<StoreSDNode>(N);
   8343   if (St->isVolatile())
   8344     return SDValue();
   8345 
   8346   // Optimize trunc store (of multiple scalars) to shuffle and store.  First,
   8347   // pack all of the elements in one place.  Next, store to memory in fewer
   8348   // chunks.
   8349   SDValue StVal = St->getValue();
   8350   EVT VT = StVal.getValueType();
   8351   if (St->isTruncatingStore() && VT.isVector()) {
   8352     SelectionDAG &DAG = DCI.DAG;
   8353     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   8354     EVT StVT = St->getMemoryVT();
   8355     unsigned NumElems = VT.getVectorNumElements();
   8356     assert(StVT != VT && "Cannot truncate to the same type");
   8357     unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
   8358     unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
   8359 
   8360     // From, To sizes and ElemCount must be pow of two
   8361     if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
   8362 
   8363     // We are going to use the original vector elt for storing.
   8364     // Accumulated smaller vector elements must be a multiple of the store size.
   8365     if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue();
   8366 
   8367     unsigned SizeRatio  = FromEltSz / ToEltSz;
   8368     assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits());
   8369 
   8370     // Create a type on which we perform the shuffle.
   8371     EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(),
   8372                                      NumElems*SizeRatio);
   8373     assert(WideVecVT.getSizeInBits() == VT.getSizeInBits());
   8374 
   8375     DebugLoc DL = St->getDebugLoc();
   8376     SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal);
   8377     SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1);
   8378     for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio;
   8379 
   8380     // Can't shuffle using an illegal type.
   8381     if (!TLI.isTypeLegal(WideVecVT)) return SDValue();
   8382 
   8383     SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec,
   8384                                 DAG.getUNDEF(WideVec.getValueType()),
   8385                                 ShuffleVec.data());
   8386     // At this point all of the data is stored at the bottom of the
   8387     // register. We now need to save it to mem.
   8388 
   8389     // Find the largest store unit
   8390     MVT StoreType = MVT::i8;
   8391     for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE;
   8392          tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) {
   8393       MVT Tp = (MVT::SimpleValueType)tp;
   8394       if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz)
   8395         StoreType = Tp;
   8396     }
   8397     // Didn't find a legal store type.
   8398     if (!TLI.isTypeLegal(StoreType))
   8399       return SDValue();
   8400 
   8401     // Bitcast the original vector into a vector of store-size units
   8402     EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(),
   8403             StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits());
   8404     assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits());
   8405     SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff);
   8406     SmallVector<SDValue, 8> Chains;
   8407     SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8,
   8408                                         TLI.getPointerTy());
   8409     SDValue BasePtr = St->getBasePtr();
   8410 
   8411     // Perform one or more big stores into memory.
   8412     unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits();
   8413     for (unsigned I = 0; I < E; I++) {
   8414       SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
   8415                                    StoreType, ShuffWide,
   8416                                    DAG.getIntPtrConstant(I));
   8417       SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr,
   8418                                 St->getPointerInfo(), St->isVolatile(),
   8419                                 St->isNonTemporal(), St->getAlignment());
   8420       BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
   8421                             Increment);
   8422       Chains.push_back(Ch);
   8423     }
   8424     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0],
   8425                        Chains.size());
   8426   }
   8427 
   8428   if (!ISD::isNormalStore(St))
   8429     return SDValue();
   8430 
   8431   // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and
   8432   // ARM stores of arguments in the same cache line.
   8433   if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR &&
   8434       StVal.getNode()->hasOneUse()) {
   8435     SelectionDAG  &DAG = DCI.DAG;
   8436     DebugLoc DL = St->getDebugLoc();
   8437     SDValue BasePtr = St->getBasePtr();
   8438     SDValue NewST1 = DAG.getStore(St->getChain(), DL,
   8439                                   StVal.getNode()->getOperand(0), BasePtr,
   8440                                   St->getPointerInfo(), St->isVolatile(),
   8441                                   St->isNonTemporal(), St->getAlignment());
   8442 
   8443     SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr,
   8444                                     DAG.getConstant(4, MVT::i32));
   8445     return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1),
   8446                         OffsetPtr, St->getPointerInfo(), St->isVolatile(),
   8447                         St->isNonTemporal(),
   8448                         std::min(4U, St->getAlignment() / 2));
   8449   }
   8450 
   8451   if (StVal.getValueType() != MVT::i64 ||
   8452       StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
   8453     return SDValue();
   8454 
   8455   // Bitcast an i64 store extracted from a vector to f64.
   8456   // Otherwise, the i64 value will be legalized to a pair of i32 values.
   8457   SelectionDAG &DAG = DCI.DAG;
   8458   DebugLoc dl = StVal.getDebugLoc();
   8459   SDValue IntVec = StVal.getOperand(0);
   8460   EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
   8461                                  IntVec.getValueType().getVectorNumElements());
   8462   SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec);
   8463   SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64,
   8464                                Vec, StVal.getOperand(1));
   8465   dl = N->getDebugLoc();
   8466   SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt);
   8467   // Make the DAGCombiner fold the bitcasts.
   8468   DCI.AddToWorklist(Vec.getNode());
   8469   DCI.AddToWorklist(ExtElt.getNode());
   8470   DCI.AddToWorklist(V.getNode());
   8471   return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(),
   8472                       St->getPointerInfo(), St->isVolatile(),
   8473                       St->isNonTemporal(), St->getAlignment(),
   8474                       St->getTBAAInfo());
   8475 }
   8476 
   8477 /// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node
   8478 /// are normal, non-volatile loads.  If so, it is profitable to bitcast an
   8479 /// i64 vector to have f64 elements, since the value can then be loaded
   8480 /// directly into a VFP register.
   8481 static bool hasNormalLoadOperand(SDNode *N) {
   8482   unsigned NumElts = N->getValueType(0).getVectorNumElements();
   8483   for (unsigned i = 0; i < NumElts; ++i) {
   8484     SDNode *Elt = N->getOperand(i).getNode();
   8485     if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile())
   8486       return true;
   8487   }
   8488   return false;
   8489 }
   8490 
   8491 /// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for
   8492 /// ISD::BUILD_VECTOR.
   8493 static SDValue PerformBUILD_VECTORCombine(SDNode *N,
   8494                                           TargetLowering::DAGCombinerInfo &DCI){
   8495   // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X):
   8496   // VMOVRRD is introduced when legalizing i64 types.  It forces the i64 value
   8497   // into a pair of GPRs, which is fine when the value is used as a scalar,
   8498   // but if the i64 value is converted to a vector, we need to undo the VMOVRRD.
   8499   SelectionDAG &DAG = DCI.DAG;
   8500   if (N->getNumOperands() == 2) {
   8501     SDValue RV = PerformVMOVDRRCombine(N, DAG);
   8502     if (RV.getNode())
   8503       return RV;
   8504   }
   8505 
   8506   // Load i64 elements as f64 values so that type legalization does not split
   8507   // them up into i32 values.
   8508   EVT VT = N->getValueType(0);
   8509   if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N))
   8510     return SDValue();
   8511   DebugLoc dl = N->getDebugLoc();
   8512   SmallVector<SDValue, 8> Ops;
   8513   unsigned NumElts = VT.getVectorNumElements();
   8514   for (unsigned i = 0; i < NumElts; ++i) {
   8515     SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i));
   8516     Ops.push_back(V);
   8517     // Make the DAGCombiner fold the bitcast.
   8518     DCI.AddToWorklist(V.getNode());
   8519   }
   8520   EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts);
   8521   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts);
   8522   return DAG.getNode(ISD::BITCAST, dl, VT, BV);
   8523 }
   8524 
   8525 /// PerformInsertEltCombine - Target-specific dag combine xforms for
   8526 /// ISD::INSERT_VECTOR_ELT.
   8527 static SDValue PerformInsertEltCombine(SDNode *N,
   8528                                        TargetLowering::DAGCombinerInfo &DCI) {
   8529   // Bitcast an i64 load inserted into a vector to f64.
   8530   // Otherwise, the i64 value will be legalized to a pair of i32 values.
   8531   EVT VT = N->getValueType(0);
   8532   SDNode *Elt = N->getOperand(1).getNode();
   8533   if (VT.getVectorElementType() != MVT::i64 ||
   8534       !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile())
   8535     return SDValue();
   8536 
   8537   SelectionDAG &DAG = DCI.DAG;
   8538   DebugLoc dl = N->getDebugLoc();
   8539   EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64,
   8540                                  VT.getVectorNumElements());
   8541   SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0));
   8542   SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1));
   8543   // Make the DAGCombiner fold the bitcasts.
   8544   DCI.AddToWorklist(Vec.getNode());
   8545   DCI.AddToWorklist(V.getNode());
   8546   SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT,
   8547                                Vec, V, N->getOperand(2));
   8548   return DAG.getNode(ISD::BITCAST, dl, VT, InsElt);
   8549 }
   8550 
   8551 /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for
   8552 /// ISD::VECTOR_SHUFFLE.
   8553 static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) {
   8554   // The LLVM shufflevector instruction does not require the shuffle mask
   8555   // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does
   8556   // have that requirement.  When translating to ISD::VECTOR_SHUFFLE, if the
   8557   // operands do not match the mask length, they are extended by concatenating
   8558   // them with undef vectors.  That is probably the right thing for other
   8559   // targets, but for NEON it is better to concatenate two double-register
   8560   // size vector operands into a single quad-register size vector.  Do that
   8561   // transformation here:
   8562   //   shuffle(concat(v1, undef), concat(v2, undef)) ->
   8563   //   shuffle(concat(v1, v2), undef)
   8564   SDValue Op0 = N->getOperand(0);
   8565   SDValue Op1 = N->getOperand(1);
   8566   if (Op0.getOpcode() != ISD::CONCAT_VECTORS ||
   8567       Op1.getOpcode() != ISD::CONCAT_VECTORS ||
   8568       Op0.getNumOperands() != 2 ||
   8569       Op1.getNumOperands() != 2)
   8570     return SDValue();
   8571   SDValue Concat0Op1 = Op0.getOperand(1);
   8572   SDValue Concat1Op1 = Op1.getOperand(1);
   8573   if (Concat0Op1.getOpcode() != ISD::UNDEF ||
   8574       Concat1Op1.getOpcode() != ISD::UNDEF)
   8575     return SDValue();
   8576   // Skip the transformation if any of the types are illegal.
   8577   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   8578   EVT VT = N->getValueType(0);
   8579   if (!TLI.isTypeLegal(VT) ||
   8580       !TLI.isTypeLegal(Concat0Op1.getValueType()) ||
   8581       !TLI.isTypeLegal(Concat1Op1.getValueType()))
   8582     return SDValue();
   8583 
   8584   SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT,
   8585                                   Op0.getOperand(0), Op1.getOperand(0));
   8586   // Translate the shuffle mask.
   8587   SmallVector<int, 16> NewMask;
   8588   unsigned NumElts = VT.getVectorNumElements();
   8589   unsigned HalfElts = NumElts/2;
   8590   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   8591   for (unsigned n = 0; n < NumElts; ++n) {
   8592     int MaskElt = SVN->getMaskElt(n);
   8593     int NewElt = -1;
   8594     if (MaskElt < (int)HalfElts)
   8595       NewElt = MaskElt;
   8596     else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts))
   8597       NewElt = HalfElts + MaskElt - NumElts;
   8598     NewMask.push_back(NewElt);
   8599   }
   8600   return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat,
   8601                               DAG.getUNDEF(VT), NewMask.data());
   8602 }
   8603 
   8604 /// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and
   8605 /// NEON load/store intrinsics to merge base address updates.
   8606 static SDValue CombineBaseUpdate(SDNode *N,
   8607                                  TargetLowering::DAGCombinerInfo &DCI) {
   8608   if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
   8609     return SDValue();
   8610 
   8611   SelectionDAG &DAG = DCI.DAG;
   8612   bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID ||
   8613                       N->getOpcode() == ISD::INTRINSIC_W_CHAIN);
   8614   unsigned AddrOpIdx = (isIntrinsic ? 2 : 1);
   8615   SDValue Addr = N->getOperand(AddrOpIdx);
   8616 
   8617   // Search for a use of the address operand that is an increment.
   8618   for (SDNode::use_iterator UI = Addr.getNode()->use_begin(),
   8619          UE = Addr.getNode()->use_end(); UI != UE; ++UI) {
   8620     SDNode *User = *UI;
   8621     if (User->getOpcode() != ISD::ADD ||
   8622         UI.getUse().getResNo() != Addr.getResNo())
   8623       continue;
   8624 
   8625     // Check that the add is independent of the load/store.  Otherwise, folding
   8626     // it would create a cycle.
   8627     if (User->isPredecessorOf(N) || N->isPredecessorOf(User))
   8628       continue;
   8629 
   8630     // Find the new opcode for the updating load/store.
   8631     bool isLoad = true;
   8632     bool isLaneOp = false;
   8633     unsigned NewOpc = 0;
   8634     unsigned NumVecs = 0;
   8635     if (isIntrinsic) {
   8636       unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   8637       switch (IntNo) {
   8638       default: llvm_unreachable("unexpected intrinsic for Neon base update");
   8639       case Intrinsic::arm_neon_vld1:     NewOpc = ARMISD::VLD1_UPD;
   8640         NumVecs = 1; break;
   8641       case Intrinsic::arm_neon_vld2:     NewOpc = ARMISD::VLD2_UPD;
   8642         NumVecs = 2; break;
   8643       case Intrinsic::arm_neon_vld3:     NewOpc = ARMISD::VLD3_UPD;
   8644         NumVecs = 3; break;
   8645       case Intrinsic::arm_neon_vld4:     NewOpc = ARMISD::VLD4_UPD;
   8646         NumVecs = 4; break;
   8647       case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD;
   8648         NumVecs = 2; isLaneOp = true; break;
   8649       case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD;
   8650         NumVecs = 3; isLaneOp = true; break;
   8651       case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD;
   8652         NumVecs = 4; isLaneOp = true; break;
   8653       case Intrinsic::arm_neon_vst1:     NewOpc = ARMISD::VST1_UPD;
   8654         NumVecs = 1; isLoad = false; break;
   8655       case Intrinsic::arm_neon_vst2:     NewOpc = ARMISD::VST2_UPD;
   8656         NumVecs = 2; isLoad = false; break;
   8657       case Intrinsic::arm_neon_vst3:     NewOpc = ARMISD::VST3_UPD;
   8658         NumVecs = 3; isLoad = false; break;
   8659       case Intrinsic::arm_neon_vst4:     NewOpc = ARMISD::VST4_UPD;
   8660         NumVecs = 4; isLoad = false; break;
   8661       case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD;
   8662         NumVecs = 2; isLoad = false; isLaneOp = true; break;
   8663       case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD;
   8664         NumVecs = 3; isLoad = false; isLaneOp = true; break;
   8665       case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD;
   8666         NumVecs = 4; isLoad = false; isLaneOp = true; break;
   8667       }
   8668     } else {
   8669       isLaneOp = true;
   8670       switch (N->getOpcode()) {
   8671       default: llvm_unreachable("unexpected opcode for Neon base update");
   8672       case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
   8673       case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
   8674       case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
   8675       }
   8676     }
   8677 
   8678     // Find the size of memory referenced by the load/store.
   8679     EVT VecTy;
   8680     if (isLoad)
   8681       VecTy = N->getValueType(0);
   8682     else
   8683       VecTy = N->getOperand(AddrOpIdx+1).getValueType();
   8684     unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8;
   8685     if (isLaneOp)
   8686       NumBytes /= VecTy.getVectorNumElements();
   8687 
   8688     // If the increment is a constant, it must match the memory ref size.
   8689     SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0);
   8690     if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) {
   8691       uint64_t IncVal = CInc->getZExtValue();
   8692       if (IncVal != NumBytes)
   8693         continue;
   8694     } else if (NumBytes >= 3 * 16) {
   8695       // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two
   8696       // separate instructions that make it harder to use a non-constant update.
   8697       continue;
   8698     }
   8699 
   8700     // Create the new updating load/store node.
   8701     EVT Tys[6];
   8702     unsigned NumResultVecs = (isLoad ? NumVecs : 0);
   8703     unsigned n;
   8704     for (n = 0; n < NumResultVecs; ++n)
   8705       Tys[n] = VecTy;
   8706     Tys[n++] = MVT::i32;
   8707     Tys[n] = MVT::Other;
   8708     SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2);
   8709     SmallVector<SDValue, 8> Ops;
   8710     Ops.push_back(N->getOperand(0)); // incoming chain
   8711     Ops.push_back(N->getOperand(AddrOpIdx));
   8712     Ops.push_back(Inc);
   8713     for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) {
   8714       Ops.push_back(N->getOperand(i));
   8715     }
   8716     MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N);
   8717     SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys,
   8718                                            Ops.data(), Ops.size(),
   8719                                            MemInt->getMemoryVT(),
   8720                                            MemInt->getMemOperand());
   8721 
   8722     // Update the uses.
   8723     std::vector<SDValue> NewResults;
   8724     for (unsigned i = 0; i < NumResultVecs; ++i) {
   8725       NewResults.push_back(SDValue(UpdN.getNode(), i));
   8726     }
   8727     NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain
   8728     DCI.CombineTo(N, NewResults);
   8729     DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs));
   8730 
   8731     break;
   8732   }
   8733   return SDValue();
   8734 }
   8735 
   8736 /// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a
   8737 /// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic
   8738 /// are also VDUPLANEs.  If so, combine them to a vldN-dup operation and
   8739 /// return true.
   8740 static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
   8741   SelectionDAG &DAG = DCI.DAG;
   8742   EVT VT = N->getValueType(0);
   8743   // vldN-dup instructions only support 64-bit vectors for N > 1.
   8744   if (!VT.is64BitVector())
   8745     return false;
   8746 
   8747   // Check if the VDUPLANE operand is a vldN-dup intrinsic.
   8748   SDNode *VLD = N->getOperand(0).getNode();
   8749   if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN)
   8750     return false;
   8751   unsigned NumVecs = 0;
   8752   unsigned NewOpc = 0;
   8753   unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue();
   8754   if (IntNo == Intrinsic::arm_neon_vld2lane) {
   8755     NumVecs = 2;
   8756     NewOpc = ARMISD::VLD2DUP;
   8757   } else if (IntNo == Intrinsic::arm_neon_vld3lane) {
   8758     NumVecs = 3;
   8759     NewOpc = ARMISD::VLD3DUP;
   8760   } else if (IntNo == Intrinsic::arm_neon_vld4lane) {
   8761     NumVecs = 4;
   8762     NewOpc = ARMISD::VLD4DUP;
   8763   } else {
   8764     return false;
   8765   }
   8766 
   8767   // First check that all the vldN-lane uses are VDUPLANEs and that the lane
   8768   // numbers match the load.
   8769   unsigned VLDLaneNo =
   8770     cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue();
   8771   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
   8772        UI != UE; ++UI) {
   8773     // Ignore uses of the chain result.
   8774     if (UI.getUse().getResNo() == NumVecs)
   8775       continue;
   8776     SDNode *User = *UI;
   8777     if (User->getOpcode() != ARMISD::VDUPLANE ||
   8778         VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue())
   8779       return false;
   8780   }
   8781 
   8782   // Create the vldN-dup node.
   8783   EVT Tys[5];
   8784   unsigned n;
   8785   for (n = 0; n < NumVecs; ++n)
   8786     Tys[n] = VT;
   8787   Tys[n] = MVT::Other;
   8788   SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1);
   8789   SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) };
   8790   MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD);
   8791   SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys,
   8792                                            Ops, 2, VLDMemInt->getMemoryVT(),
   8793                                            VLDMemInt->getMemOperand());
   8794 
   8795   // Update the uses.
   8796   for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end();
   8797        UI != UE; ++UI) {
   8798     unsigned ResNo = UI.getUse().getResNo();
   8799     // Ignore uses of the chain result.
   8800     if (ResNo == NumVecs)
   8801       continue;
   8802     SDNode *User = *UI;
   8803     DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo));
   8804   }
   8805 
   8806   // Now the vldN-lane intrinsic is dead except for its chain result.
   8807   // Update uses of the chain.
   8808   std::vector<SDValue> VLDDupResults;
   8809   for (unsigned n = 0; n < NumVecs; ++n)
   8810     VLDDupResults.push_back(SDValue(VLDDup.getNode(), n));
   8811   VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs));
   8812   DCI.CombineTo(VLD, VLDDupResults);
   8813 
   8814   return true;
   8815 }
   8816 
   8817 /// PerformVDUPLANECombine - Target-specific dag combine xforms for
   8818 /// ARMISD::VDUPLANE.
   8819 static SDValue PerformVDUPLANECombine(SDNode *N,
   8820                                       TargetLowering::DAGCombinerInfo &DCI) {
   8821   SDValue Op = N->getOperand(0);
   8822 
   8823   // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses
   8824   // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation.
   8825   if (CombineVLDDUP(N, DCI))
   8826     return SDValue(N, 0);
   8827 
   8828   // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is
   8829   // redundant.  Ignore bit_converts for now; element sizes are checked below.
   8830   while (Op.getOpcode() == ISD::BITCAST)
   8831     Op = Op.getOperand(0);
   8832   if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM)
   8833     return SDValue();
   8834 
   8835   // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
   8836   unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
   8837   // The canonical VMOV for a zero vector uses a 32-bit element size.
   8838   unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   8839   unsigned EltBits;
   8840   if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
   8841     EltSize = 8;
   8842   EVT VT = N->getValueType(0);
   8843   if (EltSize > VT.getVectorElementType().getSizeInBits())
   8844     return SDValue();
   8845 
   8846   return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
   8847 }
   8848 
   8849 // isConstVecPow2 - Return true if each vector element is a power of 2, all
   8850 // elements are the same constant, C, and Log2(C) ranges from 1 to 32.
   8851 static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
   8852 {
   8853   integerPart cN;
   8854   integerPart c0 = 0;
   8855   for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
   8856        I != E; I++) {
   8857     ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
   8858     if (!C)
   8859       return false;
   8860 
   8861     bool isExact;
   8862     APFloat APF = C->getValueAPF();
   8863     if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
   8864         != APFloat::opOK || !isExact)
   8865       return false;
   8866 
   8867     c0 = (I == 0) ? cN : c0;
   8868     if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
   8869       return false;
   8870   }
   8871   C = c0;
   8872   return true;
   8873 }
   8874 
   8875 /// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
   8876 /// can replace combinations of VMUL and VCVT (floating-point to integer)
   8877 /// when the VMUL has a constant operand that is a power of 2.
   8878 ///
   8879 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
   8880 ///  vmul.f32        d16, d17, d16
   8881 ///  vcvt.s32.f32    d16, d16
   8882 /// becomes:
   8883 ///  vcvt.s32.f32    d16, d16, #3
   8884 static SDValue PerformVCVTCombine(SDNode *N,
   8885                                   TargetLowering::DAGCombinerInfo &DCI,
   8886                                   const ARMSubtarget *Subtarget) {
   8887   SelectionDAG &DAG = DCI.DAG;
   8888   SDValue Op = N->getOperand(0);
   8889 
   8890   if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
   8891       Op.getOpcode() != ISD::FMUL)
   8892     return SDValue();
   8893 
   8894   uint64_t C;
   8895   SDValue N0 = Op->getOperand(0);
   8896   SDValue ConstVec = Op->getOperand(1);
   8897   bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
   8898 
   8899   if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
   8900       !isConstVecPow2(ConstVec, isSigned, C))
   8901     return SDValue();
   8902 
   8903   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
   8904     Intrinsic::arm_neon_vcvtfp2fxu;
   8905   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
   8906                      N->getValueType(0),
   8907                      DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
   8908                      DAG.getConstant(Log2_64(C), MVT::i32));
   8909 }
   8910 
   8911 /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
   8912 /// can replace combinations of VCVT (integer to floating-point) and VDIV
   8913 /// when the VDIV has a constant operand that is a power of 2.
   8914 ///
   8915 /// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
   8916 ///  vcvt.f32.s32    d16, d16
   8917 ///  vdiv.f32        d16, d17, d16
   8918 /// becomes:
   8919 ///  vcvt.f32.s32    d16, d16, #3
   8920 static SDValue PerformVDIVCombine(SDNode *N,
   8921                                   TargetLowering::DAGCombinerInfo &DCI,
   8922                                   const ARMSubtarget *Subtarget) {
   8923   SelectionDAG &DAG = DCI.DAG;
   8924   SDValue Op = N->getOperand(0);
   8925   unsigned OpOpcode = Op.getNode()->getOpcode();
   8926 
   8927   if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
   8928       (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
   8929     return SDValue();
   8930 
   8931   uint64_t C;
   8932   SDValue ConstVec = N->getOperand(1);
   8933   bool isSigned = OpOpcode == ISD::SINT_TO_FP;
   8934 
   8935   if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
   8936       !isConstVecPow2(ConstVec, isSigned, C))
   8937     return SDValue();
   8938 
   8939   unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
   8940     Intrinsic::arm_neon_vcvtfxu2fp;
   8941   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
   8942                      Op.getValueType(),
   8943                      DAG.getConstant(IntrinsicOpcode, MVT::i32),
   8944                      Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
   8945 }
   8946 
   8947 /// Getvshiftimm - Check if this is a valid build_vector for the immediate
   8948 /// operand of a vector shift operation, where all the elements of the
   8949 /// build_vector must have the same constant integer value.
   8950 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
   8951   // Ignore bit_converts.
   8952   while (Op.getOpcode() == ISD::BITCAST)
   8953     Op = Op.getOperand(0);
   8954   BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
   8955   APInt SplatBits, SplatUndef;
   8956   unsigned SplatBitSize;
   8957   bool HasAnyUndefs;
   8958   if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
   8959                                       HasAnyUndefs, ElementBits) ||
   8960       SplatBitSize > ElementBits)
   8961     return false;
   8962   Cnt = SplatBits.getSExtValue();
   8963   return true;
   8964 }
   8965 
   8966 /// isVShiftLImm - Check if this is a valid build_vector for the immediate
   8967 /// operand of a vector shift left operation.  That value must be in the range:
   8968 ///   0 <= Value < ElementBits for a left shift; or
   8969 ///   0 <= Value <= ElementBits for a long left shift.
   8970 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   8971   assert(VT.isVector() && "vector shift count is not a vector type");
   8972   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
   8973   if (! getVShiftImm(Op, ElementBits, Cnt))
   8974     return false;
   8975   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
   8976 }
   8977 
   8978 /// isVShiftRImm - Check if this is a valid build_vector for the immediate
   8979 /// operand of a vector shift right operation.  For a shift opcode, the value
   8980 /// is positive, but for an intrinsic the value count must be negative. The
   8981 /// absolute value must be in the range:
   8982 ///   1 <= |Value| <= ElementBits for a right shift; or
   8983 ///   1 <= |Value| <= ElementBits/2 for a narrow right shift.
   8984 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
   8985                          int64_t &Cnt) {
   8986   assert(VT.isVector() && "vector shift count is not a vector type");
   8987   unsigned ElementBits = VT.getVectorElementType().getSizeInBits();
   8988   if (! getVShiftImm(Op, ElementBits, Cnt))
   8989     return false;
   8990   if (isIntrinsic)
   8991     Cnt = -Cnt;
   8992   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits));
   8993 }
   8994 
   8995 /// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics.
   8996 static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
   8997   unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   8998   switch (IntNo) {
   8999   default:
   9000     // Don't do anything for most intrinsics.
   9001     break;
   9002 
   9003   // Vector shifts: check for immediate versions and lower them.
   9004   // Note: This is done during DAG combining instead of DAG legalizing because
   9005   // the build_vectors for 64-bit vector element shift counts are generally
   9006   // not legal, and it is hard to see their values after they get legalized to
   9007   // loads from a constant pool.
   9008   case Intrinsic::arm_neon_vshifts:
   9009   case Intrinsic::arm_neon_vshiftu:
   9010   case Intrinsic::arm_neon_vshiftls:
   9011   case Intrinsic::arm_neon_vshiftlu:
   9012   case Intrinsic::arm_neon_vshiftn:
   9013   case Intrinsic::arm_neon_vrshifts:
   9014   case Intrinsic::arm_neon_vrshiftu:
   9015   case Intrinsic::arm_neon_vrshiftn:
   9016   case Intrinsic::arm_neon_vqshifts:
   9017   case Intrinsic::arm_neon_vqshiftu:
   9018   case Intrinsic::arm_neon_vqshiftsu:
   9019   case Intrinsic::arm_neon_vqshiftns:
   9020   case Intrinsic::arm_neon_vqshiftnu:
   9021   case Intrinsic::arm_neon_vqshiftnsu:
   9022   case Intrinsic::arm_neon_vqrshiftns:
   9023   case Intrinsic::arm_neon_vqrshiftnu:
   9024   case Intrinsic::arm_neon_vqrshiftnsu: {
   9025     EVT VT = N->getOperand(1).getValueType();
   9026     int64_t Cnt;
   9027     unsigned VShiftOpc = 0;
   9028 
   9029     switch (IntNo) {
   9030     case Intrinsic::arm_neon_vshifts:
   9031     case Intrinsic::arm_neon_vshiftu:
   9032       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) {
   9033         VShiftOpc = ARMISD::VSHL;
   9034         break;
   9035       }
   9036       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) {
   9037         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ?
   9038                      ARMISD::VSHRs : ARMISD::VSHRu);
   9039         break;
   9040       }
   9041       return SDValue();
   9042 
   9043     case Intrinsic::arm_neon_vshiftls:
   9044     case Intrinsic::arm_neon_vshiftlu:
   9045       if (isVShiftLImm(N->getOperand(2), VT, true, Cnt))
   9046         break;
   9047       llvm_unreachable("invalid shift count for vshll intrinsic");
   9048 
   9049     case Intrinsic::arm_neon_vrshifts:
   9050     case Intrinsic::arm_neon_vrshiftu:
   9051       if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt))
   9052         break;
   9053       return SDValue();
   9054 
   9055     case Intrinsic::arm_neon_vqshifts:
   9056     case Intrinsic::arm_neon_vqshiftu:
   9057       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
   9058         break;
   9059       return SDValue();
   9060 
   9061     case Intrinsic::arm_neon_vqshiftsu:
   9062       if (isVShiftLImm(N->getOperand(2), VT, false, Cnt))
   9063         break;
   9064       llvm_unreachable("invalid shift count for vqshlu intrinsic");
   9065 
   9066     case Intrinsic::arm_neon_vshiftn:
   9067     case Intrinsic::arm_neon_vrshiftn:
   9068     case Intrinsic::arm_neon_vqshiftns:
   9069     case Intrinsic::arm_neon_vqshiftnu:
   9070     case Intrinsic::arm_neon_vqshiftnsu:
   9071     case Intrinsic::arm_neon_vqrshiftns:
   9072     case Intrinsic::arm_neon_vqrshiftnu:
   9073     case Intrinsic::arm_neon_vqrshiftnsu:
   9074       // Narrowing shifts require an immediate right shift.
   9075       if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt))
   9076         break;
   9077       llvm_unreachable("invalid shift count for narrowing vector shift "
   9078                        "intrinsic");
   9079 
   9080     default:
   9081       llvm_unreachable("unhandled vector shift");
   9082     }
   9083 
   9084     switch (IntNo) {
   9085     case Intrinsic::arm_neon_vshifts:
   9086     case Intrinsic::arm_neon_vshiftu:
   9087       // Opcode already set above.
   9088       break;
   9089     case Intrinsic::arm_neon_vshiftls:
   9090     case Intrinsic::arm_neon_vshiftlu:
   9091       if (Cnt == VT.getVectorElementType().getSizeInBits())
   9092         VShiftOpc = ARMISD::VSHLLi;
   9093       else
   9094         VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
   9095                      ARMISD::VSHLLs : ARMISD::VSHLLu);
   9096       break;
   9097     case Intrinsic::arm_neon_vshiftn:
   9098       VShiftOpc = ARMISD::VSHRN; break;
   9099     case Intrinsic::arm_neon_vrshifts:
   9100       VShiftOpc = ARMISD::VRSHRs; break;
   9101     case Intrinsic::arm_neon_vrshiftu:
   9102       VShiftOpc = ARMISD::VRSHRu; break;
   9103     case Intrinsic::arm_neon_vrshiftn:
   9104       VShiftOpc = ARMISD::VRSHRN; break;
   9105     case Intrinsic::arm_neon_vqshifts:
   9106       VShiftOpc = ARMISD::VQSHLs; break;
   9107     case Intrinsic::arm_neon_vqshiftu:
   9108       VShiftOpc = ARMISD::VQSHLu; break;
   9109     case Intrinsic::arm_neon_vqshiftsu:
   9110       VShiftOpc = ARMISD::VQSHLsu; break;
   9111     case Intrinsic::arm_neon_vqshiftns:
   9112       VShiftOpc = ARMISD::VQSHRNs; break;
   9113     case Intrinsic::arm_neon_vqshiftnu:
   9114       VShiftOpc = ARMISD::VQSHRNu; break;
   9115     case Intrinsic::arm_neon_vqshiftnsu:
   9116       VShiftOpc = ARMISD::VQSHRNsu; break;
   9117     case Intrinsic::arm_neon_vqrshiftns:
   9118       VShiftOpc = ARMISD::VQRSHRNs; break;
   9119     case Intrinsic::arm_neon_vqrshiftnu:
   9120       VShiftOpc = ARMISD::VQRSHRNu; break;
   9121     case Intrinsic::arm_neon_vqrshiftnsu:
   9122       VShiftOpc = ARMISD::VQRSHRNsu; break;
   9123     }
   9124 
   9125     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
   9126                        N->getOperand(1), DAG.getConstant(Cnt, MVT::i32));
   9127   }
   9128 
   9129   case Intrinsic::arm_neon_vshiftins: {
   9130     EVT VT = N->getOperand(1).getValueType();
   9131     int64_t Cnt;
   9132     unsigned VShiftOpc = 0;
   9133 
   9134     if (isVShiftLImm(N->getOperand(3), VT, false, Cnt))
   9135       VShiftOpc = ARMISD::VSLI;
   9136     else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt))
   9137       VShiftOpc = ARMISD::VSRI;
   9138     else {
   9139       llvm_unreachable("invalid shift count for vsli/vsri intrinsic");
   9140     }
   9141 
   9142     return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0),
   9143                        N->getOperand(1), N->getOperand(2),
   9144                        DAG.getConstant(Cnt, MVT::i32));
   9145   }
   9146 
   9147   case Intrinsic::arm_neon_vqrshifts:
   9148   case Intrinsic::arm_neon_vqrshiftu:
   9149     // No immediate versions of these to check for.
   9150     break;
   9151   }
   9152 
   9153   return SDValue();
   9154 }
   9155 
   9156 /// PerformShiftCombine - Checks for immediate versions of vector shifts and
   9157 /// lowers them.  As with the vector shift intrinsics, this is done during DAG
   9158 /// combining instead of DAG legalizing because the build_vectors for 64-bit
   9159 /// vector element shift counts are generally not legal, and it is hard to see
   9160 /// their values after they get legalized to loads from a constant pool.
   9161 static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG,
   9162                                    const ARMSubtarget *ST) {
   9163   EVT VT = N->getValueType(0);
   9164   if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) {
   9165     // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high
   9166     // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16.
   9167     SDValue N1 = N->getOperand(1);
   9168     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   9169       SDValue N0 = N->getOperand(0);
   9170       if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP &&
   9171           DAG.MaskedValueIsZero(N0.getOperand(0),
   9172                                 APInt::getHighBitsSet(32, 16)))
   9173         return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1);
   9174     }
   9175   }
   9176 
   9177   // Nothing to be done for scalar shifts.
   9178   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   9179   if (!VT.isVector() || !TLI.isTypeLegal(VT))
   9180     return SDValue();
   9181 
   9182   assert(ST->hasNEON() && "unexpected vector shift");
   9183   int64_t Cnt;
   9184 
   9185   switch (N->getOpcode()) {
   9186   default: llvm_unreachable("unexpected shift opcode");
   9187 
   9188   case ISD::SHL:
   9189     if (isVShiftLImm(N->getOperand(1), VT, false, Cnt))
   9190       return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0),
   9191                          DAG.getConstant(Cnt, MVT::i32));
   9192     break;
   9193 
   9194   case ISD::SRA:
   9195   case ISD::SRL:
   9196     if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) {
   9197       unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ?
   9198                             ARMISD::VSHRs : ARMISD::VSHRu);
   9199       return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0),
   9200                          DAG.getConstant(Cnt, MVT::i32));
   9201     }
   9202   }
   9203   return SDValue();
   9204 }
   9205 
   9206 /// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND,
   9207 /// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND.
   9208 static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
   9209                                     const ARMSubtarget *ST) {
   9210   SDValue N0 = N->getOperand(0);
   9211 
   9212   // Check for sign- and zero-extensions of vector extract operations of 8-
   9213   // and 16-bit vector elements.  NEON supports these directly.  They are
   9214   // handled during DAG combining because type legalization will promote them
   9215   // to 32-bit types and it is messy to recognize the operations after that.
   9216   if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   9217     SDValue Vec = N0.getOperand(0);
   9218     SDValue Lane = N0.getOperand(1);
   9219     EVT VT = N->getValueType(0);
   9220     EVT EltVT = N0.getValueType();
   9221     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   9222 
   9223     if (VT == MVT::i32 &&
   9224         (EltVT == MVT::i8 || EltVT == MVT::i16) &&
   9225         TLI.isTypeLegal(Vec.getValueType()) &&
   9226         isa<ConstantSDNode>(Lane)) {
   9227 
   9228       unsigned Opc = 0;
   9229       switch (N->getOpcode()) {
   9230       default: llvm_unreachable("unexpected opcode");
   9231       case ISD::SIGN_EXTEND:
   9232         Opc = ARMISD::VGETLANEs;
   9233         break;
   9234       case ISD::ZERO_EXTEND:
   9235       case ISD::ANY_EXTEND:
   9236         Opc = ARMISD::VGETLANEu;
   9237         break;
   9238       }
   9239       return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane);
   9240     }
   9241   }
   9242 
   9243   return SDValue();
   9244 }
   9245 
   9246 /// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
   9247 /// to match f32 max/min patterns to use NEON vmax/vmin instructions.
   9248 static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
   9249                                        const ARMSubtarget *ST) {
   9250   // If the target supports NEON, try to use vmax/vmin instructions for f32
   9251   // selects like "x < y ? x : y".  Unless the NoNaNsFPMath option is set,
   9252   // be careful about NaNs:  NEON's vmax/vmin return NaN if either operand is
   9253   // a NaN; only do the transformation when it matches that behavior.
   9254 
   9255   // For now only do this when using NEON for FP operations; if using VFP, it
   9256   // is not obvious that the benefit outweighs the cost of switching to the
   9257   // NEON pipeline.
   9258   if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
   9259       N->getValueType(0) != MVT::f32)
   9260     return SDValue();
   9261 
   9262   SDValue CondLHS = N->getOperand(0);
   9263   SDValue CondRHS = N->getOperand(1);
   9264   SDValue LHS = N->getOperand(2);
   9265   SDValue RHS = N->getOperand(3);
   9266   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
   9267 
   9268   unsigned Opcode = 0;
   9269   bool IsReversed;
   9270   if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
   9271     IsReversed = false; // x CC y ? x : y
   9272   } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
   9273     IsReversed = true ; // x CC y ? y : x
   9274   } else {
   9275     return SDValue();
   9276   }
   9277 
   9278   bool IsUnordered;
   9279   switch (CC) {
   9280   default: break;
   9281   case ISD::SETOLT:
   9282   case ISD::SETOLE:
   9283   case ISD::SETLT:
   9284   case ISD::SETLE:
   9285   case ISD::SETULT:
   9286   case ISD::SETULE:
   9287     // If LHS is NaN, an ordered comparison will be false and the result will
   9288     // be the RHS, but vmin(NaN, RHS) = NaN.  Avoid this by checking that LHS
   9289     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
   9290     IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
   9291     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
   9292       break;
   9293     // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
   9294     // will return -0, so vmin can only be used for unsafe math or if one of
   9295     // the operands is known to be nonzero.
   9296     if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
   9297         !DAG.getTarget().Options.UnsafeFPMath &&
   9298         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
   9299       break;
   9300     Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
   9301     break;
   9302 
   9303   case ISD::SETOGT:
   9304   case ISD::SETOGE:
   9305   case ISD::SETGT:
   9306   case ISD::SETGE:
   9307   case ISD::SETUGT:
   9308   case ISD::SETUGE:
   9309     // If LHS is NaN, an ordered comparison will be false and the result will
   9310     // be the RHS, but vmax(NaN, RHS) = NaN.  Avoid this by checking that LHS
   9311     // != NaN.  Likewise, for unordered comparisons, check for RHS != NaN.
   9312     IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
   9313     if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
   9314       break;
   9315     // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
   9316     // will return +0, so vmax can only be used for unsafe math or if one of
   9317     // the operands is known to be nonzero.
   9318     if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
   9319         !DAG.getTarget().Options.UnsafeFPMath &&
   9320         !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
   9321       break;
   9322     Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
   9323     break;
   9324   }
   9325 
   9326   if (!Opcode)
   9327     return SDValue();
   9328   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
   9329 }
   9330 
   9331 /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
   9332 SDValue
   9333 ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
   9334   SDValue Cmp = N->getOperand(4);
   9335   if (Cmp.getOpcode() != ARMISD::CMPZ)
   9336     // Only looking at EQ and NE cases.
   9337     return SDValue();
   9338 
   9339   EVT VT = N->getValueType(0);
   9340   DebugLoc dl = N->getDebugLoc();
   9341   SDValue LHS = Cmp.getOperand(0);
   9342   SDValue RHS = Cmp.getOperand(1);
   9343   SDValue FalseVal = N->getOperand(0);
   9344   SDValue TrueVal = N->getOperand(1);
   9345   SDValue ARMcc = N->getOperand(2);
   9346   ARMCC::CondCodes CC =
   9347     (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
   9348 
   9349   // Simplify
   9350   //   mov     r1, r0
   9351   //   cmp     r1, x
   9352   //   mov     r0, y
   9353   //   moveq   r0, x
   9354   // to
   9355   //   cmp     r0, x
   9356   //   movne   r0, y
   9357   //
   9358   //   mov     r1, r0
   9359   //   cmp     r1, x
   9360   //   mov     r0, x
   9361   //   movne   r0, y
   9362   // to
   9363   //   cmp     r0, x
   9364   //   movne   r0, y
   9365   /// FIXME: Turn this into a target neutral optimization?
   9366   SDValue Res;
   9367   if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) {
   9368     Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
   9369                       N->getOperand(3), Cmp);
   9370   } else if (CC == ARMCC::EQ && TrueVal == RHS) {
   9371     SDValue ARMcc;
   9372     SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
   9373     Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
   9374                       N->getOperand(3), NewCmp);
   9375   }
   9376 
   9377   if (Res.getNode()) {
   9378     APInt KnownZero, KnownOne;
   9379     DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne);
   9380     // Capture demanded bits information that would be otherwise lost.
   9381     if (KnownZero == 0xfffffffe)
   9382       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
   9383                         DAG.getValueType(MVT::i1));
   9384     else if (KnownZero == 0xffffff00)
   9385       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
   9386                         DAG.getValueType(MVT::i8));
   9387     else if (KnownZero == 0xffff0000)
   9388       Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
   9389                         DAG.getValueType(MVT::i16));
   9390   }
   9391 
   9392   return Res;
   9393 }
   9394 
   9395 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   9396                                              DAGCombinerInfo &DCI) const {
   9397   switch (N->getOpcode()) {
   9398   default: break;
   9399   case ISD::ADDC:       return PerformADDCCombine(N, DCI, Subtarget);
   9400   case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
   9401   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   9402   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   9403   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
   9404   case ISD::XOR:        return PerformXORCombine(N, DCI, Subtarget);
   9405   case ISD::AND:        return PerformANDCombine(N, DCI, Subtarget);
   9406   case ARMISD::BFI:     return PerformBFICombine(N, DCI);
   9407   case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
   9408   case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG);
   9409   case ISD::STORE:      return PerformSTORECombine(N, DCI);
   9410   case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI);
   9411   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   9412   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   9413   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
   9414   case ISD::FP_TO_SINT:
   9415   case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
   9416   case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
   9417   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   9418   case ISD::SHL:
   9419   case ISD::SRA:
   9420   case ISD::SRL:        return PerformShiftCombine(N, DCI.DAG, Subtarget);
   9421   case ISD::SIGN_EXTEND:
   9422   case ISD::ZERO_EXTEND:
   9423   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   9424   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
   9425   case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
   9426   case ARMISD::VLD2DUP:
   9427   case ARMISD::VLD3DUP:
   9428   case ARMISD::VLD4DUP:
   9429     return CombineBaseUpdate(N, DCI);
   9430   case ISD::INTRINSIC_VOID:
   9431   case ISD::INTRINSIC_W_CHAIN:
   9432     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
   9433     case Intrinsic::arm_neon_vld1:
   9434     case Intrinsic::arm_neon_vld2:
   9435     case Intrinsic::arm_neon_vld3:
   9436     case Intrinsic::arm_neon_vld4:
   9437     case Intrinsic::arm_neon_vld2lane:
   9438     case Intrinsic::arm_neon_vld3lane:
   9439     case Intrinsic::arm_neon_vld4lane:
   9440     case Intrinsic::arm_neon_vst1:
   9441     case Intrinsic::arm_neon_vst2:
   9442     case Intrinsic::arm_neon_vst3:
   9443     case Intrinsic::arm_neon_vst4:
   9444     case Intrinsic::arm_neon_vst2lane:
   9445     case Intrinsic::arm_neon_vst3lane:
   9446     case Intrinsic::arm_neon_vst4lane:
   9447       return CombineBaseUpdate(N, DCI);
   9448     default: break;
   9449     }
   9450     break;
   9451   }
   9452   return SDValue();
   9453 }
   9454 
   9455 bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
   9456                                                           EVT VT) const {
   9457   return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE);
   9458 }
   9459 
   9460 bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const {
   9461   // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
   9462   bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
   9463 
   9464   switch (VT.getSimpleVT().SimpleTy) {
   9465   default:
   9466     return false;
   9467   case MVT::i8:
   9468   case MVT::i16:
   9469   case MVT::i32: {
   9470     // Unaligned access can use (for example) LRDB, LRDH, LDR
   9471     if (AllowsUnaligned) {
   9472       if (Fast)
   9473         *Fast = Subtarget->hasV7Ops();
   9474       return true;
   9475     }
   9476     return false;
   9477   }
   9478   case MVT::f64:
   9479   case MVT::v2f64: {
   9480     // For any little-endian targets with neon, we can support unaligned ld/st
   9481     // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
   9482     // A big-endian target may also explictly support unaligned accesses
   9483     if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) {
   9484       if (Fast)
   9485         *Fast = true;
   9486       return true;
   9487     }
   9488     return false;
   9489   }
   9490   }
   9491 }
   9492 
   9493 static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
   9494                        unsigned AlignCheck) {
   9495   return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
   9496           (DstAlign == 0 || DstAlign % AlignCheck == 0));
   9497 }
   9498 
   9499 EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
   9500                                            unsigned DstAlign, unsigned SrcAlign,
   9501                                            bool IsMemset, bool ZeroMemset,
   9502                                            bool MemcpyStrSrc,
   9503                                            MachineFunction &MF) const {
   9504   const Function *F = MF.getFunction();
   9505 
   9506   // See if we can use NEON instructions for this...
   9507   if ((!IsMemset || ZeroMemset) &&
   9508       Subtarget->hasNEON() &&
   9509       !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
   9510                                        Attribute::NoImplicitFloat)) {
   9511     bool Fast;
   9512     if (Size >= 16 &&
   9513         (memOpAlign(SrcAlign, DstAlign, 16) ||
   9514          (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) {
   9515       return MVT::v2f64;
   9516     } else if (Size >= 8 &&
   9517                (memOpAlign(SrcAlign, DstAlign, 8) ||
   9518                 (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) {
   9519       return MVT::f64;
   9520     }
   9521   }
   9522 
   9523   // Lowering to i32/i16 if the size permits.
   9524   if (Size >= 4)
   9525     return MVT::i32;
   9526   else if (Size >= 2)
   9527     return MVT::i16;
   9528 
   9529   // Let the target-independent logic figure it out.
   9530   return MVT::Other;
   9531 }
   9532 
   9533 bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
   9534   if (Val.getOpcode() != ISD::LOAD)
   9535     return false;
   9536 
   9537   EVT VT1 = Val.getValueType();
   9538   if (!VT1.isSimple() || !VT1.isInteger() ||
   9539       !VT2.isSimple() || !VT2.isInteger())
   9540     return false;
   9541 
   9542   switch (VT1.getSimpleVT().SimpleTy) {
   9543   default: break;
   9544   case MVT::i1:
   9545   case MVT::i8:
   9546   case MVT::i16:
   9547     // 8-bit and 16-bit loads implicitly zero-extend to 32-bits.
   9548     return true;
   9549   }
   9550 
   9551   return false;
   9552 }
   9553 
   9554 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   9555   if (V < 0)
   9556     return false;
   9557 
   9558   unsigned Scale = 1;
   9559   switch (VT.getSimpleVT().SimpleTy) {
   9560   default: return false;
   9561   case MVT::i1:
   9562   case MVT::i8:
   9563     // Scale == 1;
   9564     break;
   9565   case MVT::i16:
   9566     // Scale == 2;
   9567     Scale = 2;
   9568     break;
   9569   case MVT::i32:
   9570     // Scale == 4;
   9571     Scale = 4;
   9572     break;
   9573   }
   9574 
   9575   if ((V & (Scale - 1)) != 0)
   9576     return false;
   9577   V /= Scale;
   9578   return V == (V & ((1LL << 5) - 1));
   9579 }
   9580 
   9581 static bool isLegalT2AddressImmediate(int64_t V, EVT VT,
   9582                                       const ARMSubtarget *Subtarget) {
   9583   bool isNeg = false;
   9584   if (V < 0) {
   9585     isNeg = true;
   9586     V = - V;
   9587   }
   9588 
   9589   switch (VT.getSimpleVT().SimpleTy) {
   9590   default: return false;
   9591   case MVT::i1:
   9592   case MVT::i8:
   9593   case MVT::i16:
   9594   case MVT::i32:
   9595     // + imm12 or - imm8
   9596     if (isNeg)
   9597       return V == (V & ((1LL << 8) - 1));
   9598     return V == (V & ((1LL << 12) - 1));
   9599   case MVT::f32:
   9600   case MVT::f64:
   9601     // Same as ARM mode. FIXME: NEON?
   9602     if (!Subtarget->hasVFP2())
   9603       return false;
   9604     if ((V & 3) != 0)
   9605       return false;
   9606     V >>= 2;
   9607     return V == (V & ((1LL << 8) - 1));
   9608   }
   9609 }
   9610 
   9611 /// isLegalAddressImmediate - Return true if the integer value can be used
   9612 /// as the offset of the target addressing mode for load / store of the
   9613 /// given type.
   9614 static bool isLegalAddressImmediate(int64_t V, EVT VT,
   9615                                     const ARMSubtarget *Subtarget) {
   9616   if (V == 0)
   9617     return true;
   9618 
   9619   if (!VT.isSimple())
   9620     return false;
   9621 
   9622   if (Subtarget->isThumb1Only())
   9623     return isLegalT1AddressImmediate(V, VT);
   9624   else if (Subtarget->isThumb2())
   9625     return isLegalT2AddressImmediate(V, VT, Subtarget);
   9626 
   9627   // ARM mode.
   9628   if (V < 0)
   9629     V = - V;
   9630   switch (VT.getSimpleVT().SimpleTy) {
   9631   default: return false;
   9632   case MVT::i1:
   9633   case MVT::i8:
   9634   case MVT::i32:
   9635     // +- imm12
   9636     return V == (V & ((1LL << 12) - 1));
   9637   case MVT::i16:
   9638     // +- imm8
   9639     return V == (V & ((1LL << 8) - 1));
   9640   case MVT::f32:
   9641   case MVT::f64:
   9642     if (!Subtarget->hasVFP2()) // FIXME: NEON?
   9643       return false;
   9644     if ((V & 3) != 0)
   9645       return false;
   9646     V >>= 2;
   9647     return V == (V & ((1LL << 8) - 1));
   9648   }
   9649 }
   9650 
   9651 bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM,
   9652                                                       EVT VT) const {
   9653   int Scale = AM.Scale;
   9654   if (Scale < 0)
   9655     return false;
   9656 
   9657   switch (VT.getSimpleVT().SimpleTy) {
   9658   default: return false;
   9659   case MVT::i1:
   9660   case MVT::i8:
   9661   case MVT::i16:
   9662   case MVT::i32:
   9663     if (Scale == 1)
   9664       return true;
   9665     // r + r << imm
   9666     Scale = Scale & ~1;
   9667     return Scale == 2 || Scale == 4 || Scale == 8;
   9668   case MVT::i64:
   9669     // r + r
   9670     if (((unsigned)AM.HasBaseReg + Scale) <= 2)
   9671       return true;
   9672     return false;
   9673   case MVT::isVoid:
   9674     // Note, we allow "void" uses (basically, uses that aren't loads or
   9675     // stores), because arm allows folding a scale into many arithmetic
   9676     // operations.  This should be made more precise and revisited later.
   9677 
   9678     // Allow r << imm, but the imm has to be a multiple of two.
   9679     if (Scale & 1) return false;
   9680     return isPowerOf2_32(Scale);
   9681   }
   9682 }
   9683 
   9684 /// isLegalAddressingMode - Return true if the addressing mode represented
   9685 /// by AM is legal for this target, for a load/store of the specified type.
   9686 bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
   9687                                               Type *Ty) const {
   9688   EVT VT = getValueType(Ty, true);
   9689   if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget))
   9690     return false;
   9691 
   9692   // Can never fold addr of global into load/store.
   9693   if (AM.BaseGV)
   9694     return false;
   9695 
   9696   switch (AM.Scale) {
   9697   case 0:  // no scale reg, must be "r+i" or "r", or "i".
   9698     break;
   9699   case 1:
   9700     if (Subtarget->isThumb1Only())
   9701       return false;
   9702     // FALL THROUGH.
   9703   default:
   9704     // ARM doesn't support any R+R*scale+imm addr modes.
   9705     if (AM.BaseOffs)
   9706       return false;
   9707 
   9708     if (!VT.isSimple())
   9709       return false;
   9710 
   9711     if (Subtarget->isThumb2())
   9712       return isLegalT2ScaledAddressingMode(AM, VT);
   9713 
   9714     int Scale = AM.Scale;
   9715     switch (VT.getSimpleVT().SimpleTy) {
   9716     default: return false;
   9717     case MVT::i1:
   9718     case MVT::i8:
   9719     case MVT::i32:
   9720       if (Scale < 0) Scale = -Scale;
   9721       if (Scale == 1)
   9722         return true;
   9723       // r + r << imm
   9724       return isPowerOf2_32(Scale & ~1);
   9725     case MVT::i16:
   9726     case MVT::i64:
   9727       // r + r
   9728       if (((unsigned)AM.HasBaseReg + Scale) <= 2)
   9729         return true;
   9730       return false;
   9731 
   9732     case MVT::isVoid:
   9733       // Note, we allow "void" uses (basically, uses that aren't loads or
   9734       // stores), because arm allows folding a scale into many arithmetic
   9735       // operations.  This should be made more precise and revisited later.
   9736 
   9737       // Allow r << imm, but the imm has to be a multiple of two.
   9738       if (Scale & 1) return false;
   9739       return isPowerOf2_32(Scale);
   9740     }
   9741   }
   9742   return true;
   9743 }
   9744 
   9745 /// isLegalICmpImmediate - Return true if the specified immediate is legal
   9746 /// icmp immediate, that is the target has icmp instructions which can compare
   9747 /// a register against the immediate without having to materialize the
   9748 /// immediate into a register.
   9749 bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   9750   // Thumb2 and ARM modes can use cmn for negative immediates.
   9751   if (!Subtarget->isThumb())
   9752     return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1;
   9753   if (Subtarget->isThumb2())
   9754     return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1;
   9755   // Thumb1 doesn't have cmn, and only 8-bit immediates.
   9756   return Imm >= 0 && Imm <= 255;
   9757 }
   9758 
   9759 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
   9760 /// *or sub* immediate, that is the target has add or sub instructions which can
   9761 /// add a register with the immediate without having to materialize the
   9762 /// immediate into a register.
   9763 bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const {
   9764   // Same encoding for add/sub, just flip the sign.
   9765   int64_t AbsImm = llvm::abs64(Imm);
   9766   if (!Subtarget->isThumb())
   9767     return ARM_AM::getSOImmVal(AbsImm) != -1;
   9768   if (Subtarget->isThumb2())
   9769     return ARM_AM::getT2SOImmVal(AbsImm) != -1;
   9770   // Thumb1 only has 8-bit unsigned immediate.
   9771   return AbsImm >= 0 && AbsImm <= 255;
   9772 }
   9773 
   9774 static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
   9775                                       bool isSEXTLoad, SDValue &Base,
   9776                                       SDValue &Offset, bool &isInc,
   9777                                       SelectionDAG &DAG) {
   9778   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
   9779     return false;
   9780 
   9781   if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) {
   9782     // AddressingMode 3
   9783     Base = Ptr->getOperand(0);
   9784     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
   9785       int RHSC = (int)RHS->getZExtValue();
   9786       if (RHSC < 0 && RHSC > -256) {
   9787         assert(Ptr->getOpcode() == ISD::ADD);
   9788         isInc = false;
   9789         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
   9790         return true;
   9791       }
   9792     }
   9793     isInc = (Ptr->getOpcode() == ISD::ADD);
   9794     Offset = Ptr->getOperand(1);
   9795     return true;
   9796   } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) {
   9797     // AddressingMode 2
   9798     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
   9799       int RHSC = (int)RHS->getZExtValue();
   9800       if (RHSC < 0 && RHSC > -0x1000) {
   9801         assert(Ptr->getOpcode() == ISD::ADD);
   9802         isInc = false;
   9803         Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
   9804         Base = Ptr->getOperand(0);
   9805         return true;
   9806       }
   9807     }
   9808 
   9809     if (Ptr->getOpcode() == ISD::ADD) {
   9810       isInc = true;
   9811       ARM_AM::ShiftOpc ShOpcVal=
   9812         ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode());
   9813       if (ShOpcVal != ARM_AM::no_shift) {
   9814         Base = Ptr->getOperand(1);
   9815         Offset = Ptr->getOperand(0);
   9816       } else {
   9817         Base = Ptr->getOperand(0);
   9818         Offset = Ptr->getOperand(1);
   9819       }
   9820       return true;
   9821     }
   9822 
   9823     isInc = (Ptr->getOpcode() == ISD::ADD);
   9824     Base = Ptr->getOperand(0);
   9825     Offset = Ptr->getOperand(1);
   9826     return true;
   9827   }
   9828 
   9829   // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
   9830   return false;
   9831 }
   9832 
   9833 static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT,
   9834                                      bool isSEXTLoad, SDValue &Base,
   9835                                      SDValue &Offset, bool &isInc,
   9836                                      SelectionDAG &DAG) {
   9837   if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB)
   9838     return false;
   9839 
   9840   Base = Ptr->getOperand(0);
   9841   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) {
   9842     int RHSC = (int)RHS->getZExtValue();
   9843     if (RHSC < 0 && RHSC > -0x100) { // 8 bits.
   9844       assert(Ptr->getOpcode() == ISD::ADD);
   9845       isInc = false;
   9846       Offset = DAG.getConstant(-RHSC, RHS->getValueType(0));
   9847       return true;
   9848     } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero.
   9849       isInc = Ptr->getOpcode() == ISD::ADD;
   9850       Offset = DAG.getConstant(RHSC, RHS->getValueType(0));
   9851       return true;
   9852     }
   9853   }
   9854 
   9855   return false;
   9856 }
   9857 
   9858 /// getPreIndexedAddressParts - returns true by value, base pointer and
   9859 /// offset pointer and addressing mode by reference if the node's address
   9860 /// can be legally represented as pre-indexed load / store address.
   9861 bool
   9862 ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
   9863                                              SDValue &Offset,
   9864                                              ISD::MemIndexedMode &AM,
   9865                                              SelectionDAG &DAG) const {
   9866   if (Subtarget->isThumb1Only())
   9867     return false;
   9868 
   9869   EVT VT;
   9870   SDValue Ptr;
   9871   bool isSEXTLoad = false;
   9872   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
   9873     Ptr = LD->getBasePtr();
   9874     VT  = LD->getMemoryVT();
   9875     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
   9876   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
   9877     Ptr = ST->getBasePtr();
   9878     VT  = ST->getMemoryVT();
   9879   } else
   9880     return false;
   9881 
   9882   bool isInc;
   9883   bool isLegal = false;
   9884   if (Subtarget->isThumb2())
   9885     isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
   9886                                        Offset, isInc, DAG);
   9887   else
   9888     isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base,
   9889                                         Offset, isInc, DAG);
   9890   if (!isLegal)
   9891     return false;
   9892 
   9893   AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC;
   9894   return true;
   9895 }
   9896 
   9897 /// getPostIndexedAddressParts - returns true by value, base pointer and
   9898 /// offset pointer and addressing mode by reference if this node can be
   9899 /// combined with a load / store to form a post-indexed load / store.
   9900 bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
   9901                                                    SDValue &Base,
   9902                                                    SDValue &Offset,
   9903                                                    ISD::MemIndexedMode &AM,
   9904                                                    SelectionDAG &DAG) const {
   9905   if (Subtarget->isThumb1Only())
   9906     return false;
   9907 
   9908   EVT VT;
   9909   SDValue Ptr;
   9910   bool isSEXTLoad = false;
   9911   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
   9912     VT  = LD->getMemoryVT();
   9913     Ptr = LD->getBasePtr();
   9914     isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
   9915   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
   9916     VT  = ST->getMemoryVT();
   9917     Ptr = ST->getBasePtr();
   9918   } else
   9919     return false;
   9920 
   9921   bool isInc;
   9922   bool isLegal = false;
   9923   if (Subtarget->isThumb2())
   9924     isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
   9925                                        isInc, DAG);
   9926   else
   9927     isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
   9928                                         isInc, DAG);
   9929   if (!isLegal)
   9930     return false;
   9931 
   9932   if (Ptr != Base) {
   9933     // Swap base ptr and offset to catch more post-index load / store when
   9934     // it's legal. In Thumb2 mode, offset must be an immediate.
   9935     if (Ptr == Offset && Op->getOpcode() == ISD::ADD &&
   9936         !Subtarget->isThumb2())
   9937       std::swap(Base, Offset);
   9938 
   9939     // Post-indexed load / store update the base pointer.
   9940     if (Ptr != Base)
   9941       return false;
   9942   }
   9943 
   9944   AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
   9945   return true;
   9946 }
   9947 
   9948 void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
   9949                                                        APInt &KnownZero,
   9950                                                        APInt &KnownOne,
   9951                                                        const SelectionDAG &DAG,
   9952                                                        unsigned Depth) const {
   9953   KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0);
   9954   switch (Op.getOpcode()) {
   9955   default: break;
   9956   case ARMISD::CMOV: {
   9957     // Bits are known zero/one if known on the LHS and RHS.
   9958     DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
   9959     if (KnownZero == 0 && KnownOne == 0) return;
   9960 
   9961     APInt KnownZeroRHS, KnownOneRHS;
   9962     DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1);
   9963     KnownZero &= KnownZeroRHS;
   9964     KnownOne  &= KnownOneRHS;
   9965     return;
   9966   }
   9967   }
   9968 }
   9969 
   9970 //===----------------------------------------------------------------------===//
   9971 //                           ARM Inline Assembly Support
   9972 //===----------------------------------------------------------------------===//
   9973 
   9974 bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const {
   9975   // Looking for "rev" which is V6+.
   9976   if (!Subtarget->hasV6Ops())
   9977     return false;
   9978 
   9979   InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue());
   9980   std::string AsmStr = IA->getAsmString();
   9981   SmallVector<StringRef, 4> AsmPieces;
   9982   SplitString(AsmStr, AsmPieces, ";\n");
   9983 
   9984   switch (AsmPieces.size()) {
   9985   default: return false;
   9986   case 1:
   9987     AsmStr = AsmPieces[0];
   9988     AsmPieces.clear();
   9989     SplitString(AsmStr, AsmPieces, " \t,");
   9990 
   9991     // rev $0, $1
   9992     if (AsmPieces.size() == 3 &&
   9993         AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" &&
   9994         IA->getConstraintString().compare(0, 4, "=l,l") == 0) {
   9995       IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
   9996       if (Ty && Ty->getBitWidth() == 32)
   9997         return IntrinsicLowering::LowerToByteSwap(CI);
   9998     }
   9999     break;
   10000   }
   10001 
   10002   return false;
   10003 }
   10004 
   10005 /// getConstraintType - Given a constraint letter, return the type of
   10006 /// constraint it is for this target.
   10007 ARMTargetLowering::ConstraintType
   10008 ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
   10009   if (Constraint.size() == 1) {
   10010     switch (Constraint[0]) {
   10011     default:  break;
   10012     case 'l': return C_RegisterClass;
   10013     case 'w': return C_RegisterClass;
   10014     case 'h': return C_RegisterClass;
   10015     case 'x': return C_RegisterClass;
   10016     case 't': return C_RegisterClass;
   10017     case 'j': return C_Other; // Constant for movw.
   10018       // An address with a single base register. Due to the way we
   10019       // currently handle addresses it is the same as an 'r' memory constraint.
   10020     case 'Q': return C_Memory;
   10021     }
   10022   } else if (Constraint.size() == 2) {
   10023     switch (Constraint[0]) {
   10024     default: break;
   10025     // All 'U+' constraints are addresses.
   10026     case 'U': return C_Memory;
   10027     }
   10028   }
   10029   return TargetLowering::getConstraintType(Constraint);
   10030 }
   10031 
   10032 /// Examine constraint type and operand type and determine a weight value.
   10033 /// This object must already have been set up with the operand type
   10034 /// and the current alternative constraint selected.
   10035 TargetLowering::ConstraintWeight
   10036 ARMTargetLowering::getSingleConstraintMatchWeight(
   10037     AsmOperandInfo &info, const char *constraint) const {
   10038   ConstraintWeight weight = CW_Invalid;
   10039   Value *CallOperandVal = info.CallOperandVal;
   10040     // If we don't have a value, we can't do a match,
   10041     // but allow it at the lowest weight.
   10042   if (CallOperandVal == NULL)
   10043     return CW_Default;
   10044   Type *type = CallOperandVal->getType();
   10045   // Look at the constraint type.
   10046   switch (*constraint) {
   10047   default:
   10048     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
   10049     break;
   10050   case 'l':
   10051     if (type->isIntegerTy()) {
   10052       if (Subtarget->isThumb())
   10053         weight = CW_SpecificReg;
   10054       else
   10055         weight = CW_Register;
   10056     }
   10057     break;
   10058   case 'w':
   10059     if (type->isFloatingPointTy())
   10060       weight = CW_Register;
   10061     break;
   10062   }
   10063   return weight;
   10064 }
   10065 
   10066 typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
   10067 RCPair
   10068 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   10069                                                 EVT VT) const {
   10070   if (Constraint.size() == 1) {
   10071     // GCC ARM Constraint Letters
   10072     switch (Constraint[0]) {
   10073     case 'l': // Low regs or general regs.
   10074       if (Subtarget->isThumb())
   10075         return RCPair(0U, &ARM::tGPRRegClass);
   10076       return RCPair(0U, &ARM::GPRRegClass);
   10077     case 'h': // High regs or no regs.
   10078       if (Subtarget->isThumb())
   10079         return RCPair(0U, &ARM::hGPRRegClass);
   10080       break;
   10081     case 'r':
   10082       return RCPair(0U, &ARM::GPRRegClass);
   10083     case 'w':
   10084       if (VT == MVT::f32)
   10085         return RCPair(0U, &ARM::SPRRegClass);
   10086       if (VT.getSizeInBits() == 64)
   10087         return RCPair(0U, &ARM::DPRRegClass);
   10088       if (VT.getSizeInBits() == 128)
   10089         return RCPair(0U, &ARM::QPRRegClass);
   10090       break;
   10091     case 'x':
   10092       if (VT == MVT::f32)
   10093         return RCPair(0U, &ARM::SPR_8RegClass);
   10094       if (VT.getSizeInBits() == 64)
   10095         return RCPair(0U, &ARM::DPR_8RegClass);
   10096       if (VT.getSizeInBits() == 128)
   10097         return RCPair(0U, &ARM::QPR_8RegClass);
   10098       break;
   10099     case 't':
   10100       if (VT == MVT::f32)
   10101         return RCPair(0U, &ARM::SPRRegClass);
   10102       break;
   10103     }
   10104   }
   10105   if (StringRef("{cc}").equals_lower(Constraint))
   10106     return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass);
   10107 
   10108   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
   10109 }
   10110 
   10111 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
   10112 /// vector.  If it is invalid, don't add anything to Ops.
   10113 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   10114                                                      std::string &Constraint,
   10115                                                      std::vector<SDValue>&Ops,
   10116                                                      SelectionDAG &DAG) const {
   10117   SDValue Result(0, 0);
   10118 
   10119   // Currently only support length 1 constraints.
   10120   if (Constraint.length() != 1) return;
   10121 
   10122   char ConstraintLetter = Constraint[0];
   10123   switch (ConstraintLetter) {
   10124   default: break;
   10125   case 'j':
   10126   case 'I': case 'J': case 'K': case 'L':
   10127   case 'M': case 'N': case 'O':
   10128     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
   10129     if (!C)
   10130       return;
   10131 
   10132     int64_t CVal64 = C->getSExtValue();
   10133     int CVal = (int) CVal64;
   10134     // None of these constraints allow values larger than 32 bits.  Check
   10135     // that the value fits in an int.
   10136     if (CVal != CVal64)
   10137       return;
   10138 
   10139     switch (ConstraintLetter) {
   10140       case 'j':
   10141         // Constant suitable for movw, must be between 0 and
   10142         // 65535.
   10143         if (Subtarget->hasV6T2Ops())
   10144           if (CVal >= 0 && CVal <= 65535)
   10145             break;
   10146         return;
   10147       case 'I':
   10148         if (Subtarget->isThumb1Only()) {
   10149           // This must be a constant between 0 and 255, for ADD
   10150           // immediates.
   10151           if (CVal >= 0 && CVal <= 255)
   10152             break;
   10153         } else if (Subtarget->isThumb2()) {
   10154           // A constant that can be used as an immediate value in a
   10155           // data-processing instruction.
   10156           if (ARM_AM::getT2SOImmVal(CVal) != -1)
   10157             break;
   10158         } else {
   10159           // A constant that can be used as an immediate value in a
   10160           // data-processing instruction.
   10161           if (ARM_AM::getSOImmVal(CVal) != -1)
   10162             break;
   10163         }
   10164         return;
   10165 
   10166       case 'J':
   10167         if (Subtarget->isThumb()) {  // FIXME thumb2
   10168           // This must be a constant between -255 and -1, for negated ADD
   10169           // immediates. This can be used in GCC with an "n" modifier that
   10170           // prints the negated value, for use with SUB instructions. It is
   10171           // not useful otherwise but is implemented for compatibility.
   10172           if (CVal >= -255 && CVal <= -1)
   10173             break;
   10174         } else {
   10175           // This must be a constant between -4095 and 4095. It is not clear
   10176           // what this constraint is intended for. Implemented for
   10177           // compatibility with GCC.
   10178           if (CVal >= -4095 && CVal <= 4095)
   10179             break;
   10180         }
   10181         return;
   10182 
   10183       case 'K':
   10184         if (Subtarget->isThumb1Only()) {
   10185           // A 32-bit value where only one byte has a nonzero value. Exclude
   10186           // zero to match GCC. This constraint is used by GCC internally for
   10187           // constants that can be loaded with a move/shift combination.
   10188           // It is not useful otherwise but is implemented for compatibility.
   10189           if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal))
   10190             break;
   10191         } else if (Subtarget->isThumb2()) {
   10192           // A constant whose bitwise inverse can be used as an immediate
   10193           // value in a data-processing instruction. This can be used in GCC
   10194           // with a "B" modifier that prints the inverted value, for use with
   10195           // BIC and MVN instructions. It is not useful otherwise but is
   10196           // implemented for compatibility.
   10197           if (ARM_AM::getT2SOImmVal(~CVal) != -1)
   10198             break;
   10199         } else {
   10200           // A constant whose bitwise inverse can be used as an immediate
   10201           // value in a data-processing instruction. This can be used in GCC
   10202           // with a "B" modifier that prints the inverted value, for use with
   10203           // BIC and MVN instructions. It is not useful otherwise but is
   10204           // implemented for compatibility.
   10205           if (ARM_AM::getSOImmVal(~CVal) != -1)
   10206             break;
   10207         }
   10208         return;
   10209 
   10210       case 'L':
   10211         if (Subtarget->isThumb1Only()) {
   10212           // This must be a constant between -7 and 7,
   10213           // for 3-operand ADD/SUB immediate instructions.
   10214           if (CVal >= -7 && CVal < 7)
   10215             break;
   10216         } else if (Subtarget->isThumb2()) {
   10217           // A constant whose negation can be used as an immediate value in a
   10218           // data-processing instruction. This can be used in GCC with an "n"
   10219           // modifier that prints the negated value, for use with SUB
   10220           // instructions. It is not useful otherwise but is implemented for
   10221           // compatibility.
   10222           if (ARM_AM::getT2SOImmVal(-CVal) != -1)
   10223             break;
   10224         } else {
   10225           // A constant whose negation can be used as an immediate value in a
   10226           // data-processing instruction. This can be used in GCC with an "n"
   10227           // modifier that prints the negated value, for use with SUB
   10228           // instructions. It is not useful otherwise but is implemented for
   10229           // compatibility.
   10230           if (ARM_AM::getSOImmVal(-CVal) != -1)
   10231             break;
   10232         }
   10233         return;
   10234 
   10235       case 'M':
   10236         if (Subtarget->isThumb()) { // FIXME thumb2
   10237           // This must be a multiple of 4 between 0 and 1020, for
   10238           // ADD sp + immediate.
   10239           if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
   10240             break;
   10241         } else {
   10242           // A power of two or a constant between 0 and 32.  This is used in
   10243           // GCC for the shift amount on shifted register operands, but it is
   10244           // useful in general for any shift amounts.
   10245           if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0))
   10246             break;
   10247         }
   10248         return;
   10249 
   10250       case 'N':
   10251         if (Subtarget->isThumb()) {  // FIXME thumb2
   10252           // This must be a constant between 0 and 31, for shift amounts.
   10253           if (CVal >= 0 && CVal <= 31)
   10254             break;
   10255         }
   10256         return;
   10257 
   10258       case 'O':
   10259         if (Subtarget->isThumb()) {  // FIXME thumb2
   10260           // This must be a multiple of 4 between -508 and 508, for
   10261           // ADD/SUB sp = sp + immediate.
   10262           if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0))
   10263             break;
   10264         }
   10265         return;
   10266     }
   10267     Result = DAG.getTargetConstant(CVal, Op.getValueType());
   10268     break;
   10269   }
   10270 
   10271   if (Result.getNode()) {
   10272     Ops.push_back(Result);
   10273     return;
   10274   }
   10275   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   10276 }
   10277 
   10278 bool
   10279 ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   10280   // The ARM target isn't yet aware of offsets.
   10281   return false;
   10282 }
   10283 
   10284 bool ARM::isBitFieldInvertedMask(unsigned v) {
   10285   if (v == 0xffffffff)
   10286     return 0;
   10287   // there can be 1's on either or both "outsides", all the "inside"
   10288   // bits must be 0's
   10289   unsigned int lsb = 0, msb = 31;
   10290   while (v & (1 << msb)) --msb;
   10291   while (v & (1 << lsb)) ++lsb;
   10292   for (unsigned int i = lsb; i <= msb; ++i) {
   10293     if (v & (1 << i))
   10294       return 0;
   10295   }
   10296   return 1;
   10297 }
   10298 
   10299 /// isFPImmLegal - Returns true if the target can instruction select the
   10300 /// specified FP immediate natively. If false, the legalizer will
   10301 /// materialize the FP immediate as a load from a constant pool.
   10302 bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   10303   if (!Subtarget->hasVFP3())
   10304     return false;
   10305   if (VT == MVT::f32)
   10306     return ARM_AM::getFP32Imm(Imm) != -1;
   10307   if (VT == MVT::f64)
   10308     return ARM_AM::getFP64Imm(Imm) != -1;
   10309   return false;
   10310 }
   10311 
   10312 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
   10313 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
   10314 /// specified in the intrinsic calls.
   10315 bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   10316                                            const CallInst &I,
   10317                                            unsigned Intrinsic) const {
   10318   switch (Intrinsic) {
   10319   case Intrinsic::arm_neon_vld1:
   10320   case Intrinsic::arm_neon_vld2:
   10321   case Intrinsic::arm_neon_vld3:
   10322   case Intrinsic::arm_neon_vld4:
   10323   case Intrinsic::arm_neon_vld2lane:
   10324   case Intrinsic::arm_neon_vld3lane:
   10325   case Intrinsic::arm_neon_vld4lane: {
   10326     Info.opc = ISD::INTRINSIC_W_CHAIN;
   10327     // Conservatively set memVT to the entire set of vectors loaded.
   10328     uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8;
   10329     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
   10330     Info.ptrVal = I.getArgOperand(0);
   10331     Info.offset = 0;
   10332     Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
   10333     Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
   10334     Info.vol = false; // volatile loads with NEON intrinsics not supported
   10335     Info.readMem = true;
   10336     Info.writeMem = false;
   10337     return true;
   10338   }
   10339   case Intrinsic::arm_neon_vst1:
   10340   case Intrinsic::arm_neon_vst2:
   10341   case Intrinsic::arm_neon_vst3:
   10342   case Intrinsic::arm_neon_vst4:
   10343   case Intrinsic::arm_neon_vst2lane:
   10344   case Intrinsic::arm_neon_vst3lane:
   10345   case Intrinsic::arm_neon_vst4lane: {
   10346     Info.opc = ISD::INTRINSIC_VOID;
   10347     // Conservatively set memVT to the entire set of vectors stored.
   10348     unsigned NumElts = 0;
   10349     for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
   10350       Type *ArgTy = I.getArgOperand(ArgI)->getType();
   10351       if (!ArgTy->isVectorTy())
   10352         break;
   10353       NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8;
   10354     }
   10355     Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
   10356     Info.ptrVal = I.getArgOperand(0);
   10357     Info.offset = 0;
   10358     Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1);
   10359     Info.align = cast<ConstantInt>(AlignArg)->getZExtValue();
   10360     Info.vol = false; // volatile stores with NEON intrinsics not supported
   10361     Info.readMem = false;
   10362     Info.writeMem = true;
   10363     return true;
   10364   }
   10365   case Intrinsic::arm_strexd: {
   10366     Info.opc = ISD::INTRINSIC_W_CHAIN;
   10367     Info.memVT = MVT::i64;
   10368     Info.ptrVal = I.getArgOperand(2);
   10369     Info.offset = 0;
   10370     Info.align = 8;
   10371     Info.vol = true;
   10372     Info.readMem = false;
   10373     Info.writeMem = true;
   10374     return true;
   10375   }
   10376   case Intrinsic::arm_ldrexd: {
   10377     Info.opc = ISD::INTRINSIC_W_CHAIN;
   10378     Info.memVT = MVT::i64;
   10379     Info.ptrVal = I.getArgOperand(0);
   10380     Info.offset = 0;
   10381     Info.align = 8;
   10382     Info.vol = true;
   10383     Info.readMem = true;
   10384     Info.writeMem = false;
   10385     return true;
   10386   }
   10387   default:
   10388     break;
   10389   }
   10390 
   10391   return false;
   10392 }
   10393