Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that ARM uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "arm-isel"
     16 #include "ARMISelLowering.h"
     17 #include "ARM.h"
     18 #include "ARMCallingConv.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMMachineFunctionInfo.h"
     21 #include "ARMPerfectShuffle.h"
     22 #include "ARMSubtarget.h"
     23 #include "ARMTargetMachine.h"
     24 #include "ARMTargetObjectFile.h"
     25 #include "MCTargetDesc/ARMAddressingModes.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/ADT/StringExtras.h"
     28 #include "llvm/CodeGen/CallingConvLower.h"
     29 #include "llvm/CodeGen/IntrinsicLowering.h"
     30 #include "llvm/CodeGen/MachineBasicBlock.h"
     31 #include "llvm/CodeGen/MachineFrameInfo.h"
     32 #include "llvm/CodeGen/MachineFunction.h"
     33 #include "llvm/CodeGen/MachineInstrBuilder.h"
     34 #include "llvm/CodeGen/MachineModuleInfo.h"
     35 #include "llvm/CodeGen/MachineRegisterInfo.h"
     36 #include "llvm/CodeGen/SelectionDAG.h"
     37 #include "llvm/IR/CallingConv.h"
     38 #include "llvm/IR/Constants.h"
     39 #include "llvm/IR/Function.h"
     40 #include "llvm/IR/GlobalValue.h"
     41 #include "llvm/IR/Instruction.h"
     42 #include "llvm/IR/Instructions.h"
     43 #include "llvm/IR/Intrinsics.h"
     44 #include "llvm/IR/Type.h"
     45 #include "llvm/MC/MCSectionMachO.h"
     46 #include "llvm/Support/CommandLine.h"
     47 #include "llvm/Support/ErrorHandling.h"
     48 #include "llvm/Support/MathExtras.h"
     49 #include "llvm/Support/raw_ostream.h"
     50 #include "llvm/Target/TargetOptions.h"
     51 using namespace llvm;
     52 
     53 STATISTIC(NumTailCalls, "Number of tail calls");
     54 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
     55 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
     56 
     57 // This option should go away when tail calls fully work.
     58 static cl::opt<bool>
     59 EnableARMTailCalls("arm-tail-calls", cl::Hidden,
     60   cl::desc("Generate tail calls (TEMPORARY OPTION)."),
     61   cl::init(false));
     62 
     63 cl::opt<bool>
     64 EnableARMLongCalls("arm-long-calls", cl::Hidden,
     65   cl::desc("Generate calls via indirect call instructions"),
     66   cl::init(false));
     67 
     68 static cl::opt<bool>
     69 ARMInterworking("arm-interworking", cl::Hidden,
     70   cl::desc("Enable / disable ARM interworking (for debugging only)"),
     71   cl::init(true));
     72 
     73 namespace {
     74   class ARMCCState : public CCState {
     75   public:
     76     ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
     77                const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs,
     78                LLVMContext &C, ParmContext PC)
     79         : CCState(CC, isVarArg, MF, TM, locs, C) {
     80       assert(((PC == Call) || (PC == Prologue)) &&
     81              "ARMCCState users must specify whether their context is call"
     82              "or prologue generation.");
     83       CallOrPrologue = PC;
     84     }
     85   };
     86 }
     87 
     88 // The APCS parameter registers.
     89 static const uint16_t GPRArgRegs[] = {
     90   ARM::R0, ARM::R1, ARM::R2, ARM::R3
     91 };
     92 
     93 void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
     94                                        MVT PromotedBitwiseVT) {
     95   if (VT != PromotedLdStVT) {
     96     setOperationAction(ISD::LOAD, VT, Promote);
     97     AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
     98 
     99     setOperationAction(ISD::STORE, VT, Promote);
    100     AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
    101   }
    102 
    103   MVT ElemTy = VT.getVectorElementType();
    104   if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
    105     setOperationAction(ISD::SETCC, VT, Custom);
    106   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    107   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    108   if (ElemTy == MVT::i32) {
    109     setOperationAction(ISD::SINT_TO_FP, VT, Custom);
    110     setOperationAction(ISD::UINT_TO_FP, VT, Custom);
    111     setOperationAction(ISD::FP_TO_SINT, VT, Custom);
    112     setOperationAction(ISD::FP_TO_UINT, VT, Custom);
    113   } else {
    114     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
    115     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
    116     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
    117     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
    118   }
    119   setOperationAction(ISD::BUILD_VECTOR,      VT, Custom);
    120   setOperationAction(ISD::VECTOR_SHUFFLE,    VT, Custom);
    121   setOperationAction(ISD::CONCAT_VECTORS,    VT, Legal);
    122   setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
    123   setOperationAction(ISD::SELECT,            VT, Expand);
    124   setOperationAction(ISD::SELECT_CC,         VT, Expand);
    125   setOperationAction(ISD::VSELECT,           VT, Expand);
    126   setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
    127   if (VT.isInteger()) {
    128     setOperationAction(ISD::SHL, VT, Custom);
    129     setOperationAction(ISD::SRA, VT, Custom);
    130     setOperationAction(ISD::SRL, VT, Custom);
    131   }
    132 
    133   // Promote all bit-wise operations.
    134   if (VT.isInteger() && VT != PromotedBitwiseVT) {
    135     setOperationAction(ISD::AND, VT, Promote);
    136     AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
    137     setOperationAction(ISD::OR,  VT, Promote);
    138     AddPromotedToType (ISD::OR,  VT, PromotedBitwiseVT);
    139     setOperationAction(ISD::XOR, VT, Promote);
    140     AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
    141   }
    142 
    143   // Neon does not support vector divide/remainder operations.
    144   setOperationAction(ISD::SDIV, VT, Expand);
    145   setOperationAction(ISD::UDIV, VT, Expand);
    146   setOperationAction(ISD::FDIV, VT, Expand);
    147   setOperationAction(ISD::SREM, VT, Expand);
    148   setOperationAction(ISD::UREM, VT, Expand);
    149   setOperationAction(ISD::FREM, VT, Expand);
    150 }
    151 
    152 void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
    153   addRegisterClass(VT, &ARM::DPRRegClass);
    154   addTypeForNEON(VT, MVT::f64, MVT::v2i32);
    155 }
    156 
    157 void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
    158   addRegisterClass(VT, &ARM::QPRRegClass);
    159   addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
    160 }
    161 
    162 static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
    163   if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin())
    164     return new TargetLoweringObjectFileMachO();
    165 
    166   return new ARMElfTargetObjectFile();
    167 }
    168 
    169 ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
    170     : TargetLowering(TM, createTLOF(TM)) {
    171   Subtarget = &TM.getSubtarget<ARMSubtarget>();
    172   RegInfo = TM.getRegisterInfo();
    173   Itins = TM.getInstrItineraryData();
    174 
    175   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
    176 
    177   if (Subtarget->isTargetDarwin()) {
    178     // Uses VFP for Thumb libfuncs if available.
    179     if (Subtarget->isThumb() && Subtarget->hasVFP2()) {
    180       // Single-precision floating-point arithmetic.
    181       setLibcallName(RTLIB::ADD_F32, "__addsf3vfp");
    182       setLibcallName(RTLIB::SUB_F32, "__subsf3vfp");
    183       setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp");
    184       setLibcallName(RTLIB::DIV_F32, "__divsf3vfp");
    185 
    186       // Double-precision floating-point arithmetic.
    187       setLibcallName(RTLIB::ADD_F64, "__adddf3vfp");
    188       setLibcallName(RTLIB::SUB_F64, "__subdf3vfp");
    189       setLibcallName(RTLIB::MUL_F64, "__muldf3vfp");
    190       setLibcallName(RTLIB::DIV_F64, "__divdf3vfp");
    191 
    192       // Single-precision comparisons.
    193       setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp");
    194       setLibcallName(RTLIB::UNE_F32, "__nesf2vfp");
    195       setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp");
    196       setLibcallName(RTLIB::OLE_F32, "__lesf2vfp");
    197       setLibcallName(RTLIB::OGE_F32, "__gesf2vfp");
    198       setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp");
    199       setLibcallName(RTLIB::UO_F32,  "__unordsf2vfp");
    200       setLibcallName(RTLIB::O_F32,   "__unordsf2vfp");
    201 
    202       setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    203       setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
    204       setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    205       setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    206       setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    207       setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    208       setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    209       setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    210 
    211       // Double-precision comparisons.
    212       setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp");
    213       setLibcallName(RTLIB::UNE_F64, "__nedf2vfp");
    214       setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp");
    215       setLibcallName(RTLIB::OLE_F64, "__ledf2vfp");
    216       setLibcallName(RTLIB::OGE_F64, "__gedf2vfp");
    217       setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp");
    218       setLibcallName(RTLIB::UO_F64,  "__unorddf2vfp");
    219       setLibcallName(RTLIB::O_F64,   "__unorddf2vfp");
    220 
    221       setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    222       setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE);
    223       setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    224       setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    225       setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    226       setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    227       setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    228       setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    229 
    230       // Floating-point to integer conversions.
    231       // i64 conversions are done via library routines even when generating VFP
    232       // instructions, so use the same ones.
    233       setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp");
    234       setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp");
    235       setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp");
    236       setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp");
    237 
    238       // Conversions between floating types.
    239       setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp");
    240       setLibcallName(RTLIB::FPEXT_F32_F64,   "__extendsfdf2vfp");
    241 
    242       // Integer to floating-point conversions.
    243       // i64 conversions are done via library routines even when generating VFP
    244       // instructions, so use the same ones.
    245       // FIXME: There appears to be some naming inconsistency in ARM libgcc:
    246       // e.g., __floatunsidf vs. __floatunssidfvfp.
    247       setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp");
    248       setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp");
    249       setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp");
    250       setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp");
    251     }
    252   }
    253 
    254   // These libcalls are not available in 32-bit.
    255   setLibcallName(RTLIB::SHL_I128, 0);
    256   setLibcallName(RTLIB::SRL_I128, 0);
    257   setLibcallName(RTLIB::SRA_I128, 0);
    258 
    259   if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) {
    260     // Double-precision floating-point arithmetic helper functions
    261     // RTABI chapter 4.1.2, Table 2
    262     setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd");
    263     setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv");
    264     setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul");
    265     setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub");
    266     setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS);
    267     setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS);
    268     setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS);
    269     setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS);
    270 
    271     // Double-precision floating-point comparison helper functions
    272     // RTABI chapter 4.1.2, Table 3
    273     setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq");
    274     setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE);
    275     setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq");
    276     setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ);
    277     setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt");
    278     setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE);
    279     setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple");
    280     setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE);
    281     setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge");
    282     setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE);
    283     setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt");
    284     setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE);
    285     setLibcallName(RTLIB::UO_F64,  "__aeabi_dcmpun");
    286     setCmpLibcallCC(RTLIB::UO_F64,  ISD::SETNE);
    287     setLibcallName(RTLIB::O_F64,   "__aeabi_dcmpun");
    288     setCmpLibcallCC(RTLIB::O_F64,   ISD::SETEQ);
    289     setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS);
    290     setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS);
    291     setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS);
    292     setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS);
    293     setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS);
    294     setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS);
    295     setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS);
    296     setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS);
    297 
    298     // Single-precision floating-point arithmetic helper functions
    299     // RTABI chapter 4.1.2, Table 4
    300     setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd");
    301     setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv");
    302     setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul");
    303     setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub");
    304     setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS);
    305     setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS);
    306     setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS);
    307     setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS);
    308 
    309     // Single-precision floating-point comparison helper functions
    310     // RTABI chapter 4.1.2, Table 5
    311     setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq");
    312     setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
    313     setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq");
    314     setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ);
    315     setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt");
    316     setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
    317     setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple");
    318     setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
    319     setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge");
    320     setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
    321     setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt");
    322     setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
    323     setLibcallName(RTLIB::UO_F32,  "__aeabi_fcmpun");
    324     setCmpLibcallCC(RTLIB::UO_F32,  ISD::SETNE);
    325     setLibcallName(RTLIB::O_F32,   "__aeabi_fcmpun");
    326     setCmpLibcallCC(RTLIB::O_F32,   ISD::SETEQ);
    327     setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS);
    328     setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS);
    329     setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS);
    330     setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS);
    331     setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS);
    332     setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS);
    333     setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS);
    334     setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS);
    335 
    336     // Floating-point to integer conversions.
    337     // RTABI chapter 4.1.2, Table 6
    338     setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz");
    339     setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz");
    340     setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz");
    341     setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz");
    342     setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz");
    343     setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz");
    344     setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz");
    345     setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz");
    346     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS);
    347     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS);
    348     setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS);
    349     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS);
    350     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS);
    351     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS);
    352     setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS);
    353     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS);
    354 
    355     // Conversions between floating types.
    356     // RTABI chapter 4.1.2, Table 7
    357     setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f");
    358     setLibcallName(RTLIB::FPEXT_F32_F64,   "__aeabi_f2d");
    359     setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS);
    360     setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS);
    361 
    362     // Integer to floating-point conversions.
    363     // RTABI chapter 4.1.2, Table 8
    364     setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d");
    365     setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d");
    366     setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d");
    367     setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d");
    368     setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f");
    369     setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f");
    370     setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f");
    371     setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f");
    372     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    373     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS);
    374     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    375     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS);
    376     setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    377     setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS);
    378     setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    379     setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS);
    380 
    381     // Long long helper functions
    382     // RTABI chapter 4.2, Table 9
    383     setLibcallName(RTLIB::MUL_I64,  "__aeabi_lmul");
    384     setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl");
    385     setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr");
    386     setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr");
    387     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS);
    388     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    389     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    390     setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS);
    391     setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS);
    392     setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS);
    393 
    394     // Integer division functions
    395     // RTABI chapter 4.3.1
    396     setLibcallName(RTLIB::SDIV_I8,  "__aeabi_idiv");
    397     setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv");
    398     setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv");
    399     setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod");
    400     setLibcallName(RTLIB::UDIV_I8,  "__aeabi_uidiv");
    401     setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv");
    402     setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv");
    403     setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod");
    404     setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS);
    405     setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS);
    406     setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS);
    407     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS);
    408     setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS);
    409     setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS);
    410     setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS);
    411     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS);
    412 
    413     // Memory operations
    414     // RTABI chapter 4.3.4
    415     setLibcallName(RTLIB::MEMCPY,  "__aeabi_memcpy");
    416     setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove");
    417     setLibcallName(RTLIB::MEMSET,  "__aeabi_memset");
    418     setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS);
    419     setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS);
    420     setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS);
    421   }
    422 
    423   // Use divmod compiler-rt calls for iOS 5.0 and later.
    424   if (Subtarget->getTargetTriple().getOS() == Triple::IOS &&
    425       !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) {
    426     setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
    427     setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
    428   }
    429 
    430   if (Subtarget->isThumb1Only())
    431     addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
    432   else
    433     addRegisterClass(MVT::i32, &ARM::GPRRegClass);
    434   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    435       !Subtarget->isThumb1Only()) {
    436     addRegisterClass(MVT::f32, &ARM::SPRRegClass);
    437     if (!Subtarget->isFPOnlySP())
    438       addRegisterClass(MVT::f64, &ARM::DPRRegClass);
    439 
    440     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    441   }
    442 
    443   for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    444        VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) {
    445     for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
    446          InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
    447       setTruncStoreAction((MVT::SimpleValueType)VT,
    448                           (MVT::SimpleValueType)InnerVT, Expand);
    449     setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    450     setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
    451     setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
    452   }
    453 
    454   setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
    455 
    456   if (Subtarget->hasNEON()) {
    457     addDRTypeForNEON(MVT::v2f32);
    458     addDRTypeForNEON(MVT::v8i8);
    459     addDRTypeForNEON(MVT::v4i16);
    460     addDRTypeForNEON(MVT::v2i32);
    461     addDRTypeForNEON(MVT::v1i64);
    462 
    463     addQRTypeForNEON(MVT::v4f32);
    464     addQRTypeForNEON(MVT::v2f64);
    465     addQRTypeForNEON(MVT::v16i8);
    466     addQRTypeForNEON(MVT::v8i16);
    467     addQRTypeForNEON(MVT::v4i32);
    468     addQRTypeForNEON(MVT::v2i64);
    469 
    470     // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
    471     // neither Neon nor VFP support any arithmetic operations on it.
    472     // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
    473     // supported for v4f32.
    474     setOperationAction(ISD::FADD, MVT::v2f64, Expand);
    475     setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
    476     setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
    477     // FIXME: Code duplication: FDIV and FREM are expanded always, see
    478     // ARMTargetLowering::addTypeForNEON method for details.
    479     setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
    480     setOperationAction(ISD::FREM, MVT::v2f64, Expand);
    481     // FIXME: Create unittest.
    482     // In another words, find a way when "copysign" appears in DAG with vector
    483     // operands.
    484     setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
    485     // FIXME: Code duplication: SETCC has custom operation action, see
    486     // ARMTargetLowering::addTypeForNEON method for details.
    487     setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
    488     // FIXME: Create unittest for FNEG and for FABS.
    489     setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
    490     setOperationAction(ISD::FABS, MVT::v2f64, Expand);
    491     setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
    492     setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
    493     setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
    494     setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
    495     setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
    496     setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
    497     setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
    498     setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
    499     setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
    500     setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
    501     // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
    502     setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
    503     setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
    504     setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
    505     setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
    506     setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
    507     setOperationAction(ISD::FMA, MVT::v2f64, Expand);
    508 
    509     setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
    510     setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
    511     setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
    512     setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
    513     setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
    514     setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
    515     setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
    516     setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
    517     setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
    518     setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
    519     setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
    520     setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
    521     setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
    522     setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
    523     setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
    524 
    525     // Mark v2f32 intrinsics.
    526     setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
    527     setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
    528     setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
    529     setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
    530     setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
    531     setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
    532     setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
    533     setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
    534     setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
    535     setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
    536     setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
    537     setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
    538     setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
    539     setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
    540     setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
    541 
    542     // Neon does not support some operations on v1i64 and v2i64 types.
    543     setOperationAction(ISD::MUL, MVT::v1i64, Expand);
    544     // Custom handling for some quad-vector types to detect VMULL.
    545     setOperationAction(ISD::MUL, MVT::v8i16, Custom);
    546     setOperationAction(ISD::MUL, MVT::v4i32, Custom);
    547     setOperationAction(ISD::MUL, MVT::v2i64, Custom);
    548     // Custom handling for some vector types to avoid expensive expansions
    549     setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
    550     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
    551     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
    552     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
    553     setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
    554     setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
    555     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
    556     // a destination type that is wider than the source, and nor does
    557     // it have a FP_TO_[SU]INT instruction with a narrower destination than
    558     // source.
    559     setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
    560     setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
    561     setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
    562     setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
    563 
    564     setOperationAction(ISD::FP_ROUND,   MVT::v2f32, Expand);
    565     setOperationAction(ISD::FP_EXTEND,  MVT::v2f64, Expand);
    566 
    567     // NEON does not have single instruction CTPOP for vectors with element
    568     // types wider than 8-bits.  However, custom lowering can leverage the
    569     // v8i8/v16i8 vcnt instruction.
    570     setOperationAction(ISD::CTPOP,      MVT::v2i32, Custom);
    571     setOperationAction(ISD::CTPOP,      MVT::v4i32, Custom);
    572     setOperationAction(ISD::CTPOP,      MVT::v4i16, Custom);
    573     setOperationAction(ISD::CTPOP,      MVT::v8i16, Custom);
    574 
    575     // NEON only has FMA instructions as of VFP4.
    576     if (!Subtarget->hasVFP4()) {
    577       setOperationAction(ISD::FMA, MVT::v2f32, Expand);
    578       setOperationAction(ISD::FMA, MVT::v4f32, Expand);
    579     }
    580 
    581     setTargetDAGCombine(ISD::INTRINSIC_VOID);
    582     setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
    583     setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
    584     setTargetDAGCombine(ISD::SHL);
    585     setTargetDAGCombine(ISD::SRL);
    586     setTargetDAGCombine(ISD::SRA);
    587     setTargetDAGCombine(ISD::SIGN_EXTEND);
    588     setTargetDAGCombine(ISD::ZERO_EXTEND);
    589     setTargetDAGCombine(ISD::ANY_EXTEND);
    590     setTargetDAGCombine(ISD::SELECT_CC);
    591     setTargetDAGCombine(ISD::BUILD_VECTOR);
    592     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
    593     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
    594     setTargetDAGCombine(ISD::STORE);
    595     setTargetDAGCombine(ISD::FP_TO_SINT);
    596     setTargetDAGCombine(ISD::FP_TO_UINT);
    597     setTargetDAGCombine(ISD::FDIV);
    598 
    599     // It is legal to extload from v4i8 to v4i16 or v4i32.
    600     MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8,
    601                   MVT::v4i16, MVT::v2i16,
    602                   MVT::v2i32};
    603     for (unsigned i = 0; i < 6; ++i) {
    604       setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal);
    605       setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal);
    606       setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal);
    607     }
    608   }
    609 
    610   // ARM and Thumb2 support UMLAL/SMLAL.
    611   if (!Subtarget->isThumb1Only())
    612     setTargetDAGCombine(ISD::ADDC);
    613 
    614 
    615   computeRegisterProperties();
    616 
    617   // ARM does not have f32 extending load.
    618   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    619 
    620   // ARM does not have i1 sign extending load.
    621   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
    622 
    623   // ARM supports all 4 flavors of integer indexed load / store.
    624   if (!Subtarget->isThumb1Only()) {
    625     for (unsigned im = (unsigned)ISD::PRE_INC;
    626          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
    627       setIndexedLoadAction(im,  MVT::i1,  Legal);
    628       setIndexedLoadAction(im,  MVT::i8,  Legal);
    629       setIndexedLoadAction(im,  MVT::i16, Legal);
    630       setIndexedLoadAction(im,  MVT::i32, Legal);
    631       setIndexedStoreAction(im, MVT::i1,  Legal);
    632       setIndexedStoreAction(im, MVT::i8,  Legal);
    633       setIndexedStoreAction(im, MVT::i16, Legal);
    634       setIndexedStoreAction(im, MVT::i32, Legal);
    635     }
    636   }
    637 
    638   // i64 operation support.
    639   setOperationAction(ISD::MUL,     MVT::i64, Expand);
    640   setOperationAction(ISD::MULHU,   MVT::i32, Expand);
    641   if (Subtarget->isThumb1Only()) {
    642     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
    643     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
    644   }
    645   if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
    646       || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
    647     setOperationAction(ISD::MULHS, MVT::i32, Expand);
    648 
    649   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
    650   setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
    651   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
    652   setOperationAction(ISD::SRL,       MVT::i64, Custom);
    653   setOperationAction(ISD::SRA,       MVT::i64, Custom);
    654 
    655   if (!Subtarget->isThumb1Only()) {
    656     // FIXME: We should do this for Thumb1 as well.
    657     setOperationAction(ISD::ADDC,    MVT::i32, Custom);
    658     setOperationAction(ISD::ADDE,    MVT::i32, Custom);
    659     setOperationAction(ISD::SUBC,    MVT::i32, Custom);
    660     setOperationAction(ISD::SUBE,    MVT::i32, Custom);
    661   }
    662 
    663   // ARM does not have ROTL.
    664   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
    665   setOperationAction(ISD::CTTZ,  MVT::i32, Custom);
    666   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
    667   if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
    668     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
    669 
    670   // These just redirect to CTTZ and CTLZ on ARM.
    671   setOperationAction(ISD::CTTZ_ZERO_UNDEF  , MVT::i32  , Expand);
    672   setOperationAction(ISD::CTLZ_ZERO_UNDEF  , MVT::i32  , Expand);
    673 
    674   // Only ARMv6 has BSWAP.
    675   if (!Subtarget->hasV6Ops())
    676     setOperationAction(ISD::BSWAP, MVT::i32, Expand);
    677 
    678   if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) &&
    679       !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) {
    680     // These are expanded into libcalls if the cpu doesn't have HW divider.
    681     setOperationAction(ISD::SDIV,  MVT::i32, Expand);
    682     setOperationAction(ISD::UDIV,  MVT::i32, Expand);
    683   }
    684   setOperationAction(ISD::SREM,  MVT::i32, Expand);
    685   setOperationAction(ISD::UREM,  MVT::i32, Expand);
    686   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    687   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    688 
    689   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
    690   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
    691   setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom);
    692   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
    693   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
    694 
    695   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    696 
    697   // Use the default implementation.
    698   setOperationAction(ISD::VASTART,            MVT::Other, Custom);
    699   setOperationAction(ISD::VAARG,              MVT::Other, Expand);
    700   setOperationAction(ISD::VACOPY,             MVT::Other, Expand);
    701   setOperationAction(ISD::VAEND,              MVT::Other, Expand);
    702   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
    703   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
    704 
    705   if (!Subtarget->isTargetDarwin()) {
    706     // Non-Darwin platforms may return values in these registers via the
    707     // personality function.
    708     setOperationAction(ISD::EHSELECTION,      MVT::i32,   Expand);
    709     setOperationAction(ISD::EXCEPTIONADDR,    MVT::i32,   Expand);
    710     setExceptionPointerRegister(ARM::R0);
    711     setExceptionSelectorRegister(ARM::R1);
    712   }
    713 
    714   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
    715   // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
    716   // the default expansion.
    717   // FIXME: This should be checking for v6k, not just v6.
    718   if (Subtarget->hasDataBarrier() ||
    719       (Subtarget->hasV6Ops() && !Subtarget->isThumb())) {
    720     // membarrier needs custom lowering; the rest are legal and handled
    721     // normally.
    722     setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
    723     setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
    724     // Custom lowering for 64-bit ops
    725     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i64, Custom);
    726     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i64, Custom);
    727     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i64, Custom);
    728     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i64, Custom);
    729     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i64, Custom);
    730     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i64, Custom);
    731     setOperationAction(ISD::ATOMIC_LOAD_MIN,  MVT::i64, Custom);
    732     setOperationAction(ISD::ATOMIC_LOAD_MAX,  MVT::i64, Custom);
    733     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom);
    734     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom);
    735     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i64, Custom);
    736     // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc.
    737     setInsertFencesForAtomic(true);
    738   } else {
    739     // Set them all for expansion, which will force libcalls.
    740     setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
    741     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other, Expand);
    742     setOperationAction(ISD::ATOMIC_CMP_SWAP,  MVT::i32, Expand);
    743     setOperationAction(ISD::ATOMIC_SWAP,      MVT::i32, Expand);
    744     setOperationAction(ISD::ATOMIC_LOAD_ADD,  MVT::i32, Expand);
    745     setOperationAction(ISD::ATOMIC_LOAD_SUB,  MVT::i32, Expand);
    746     setOperationAction(ISD::ATOMIC_LOAD_AND,  MVT::i32, Expand);
    747     setOperationAction(ISD::ATOMIC_LOAD_OR,   MVT::i32, Expand);
    748     setOperationAction(ISD::ATOMIC_LOAD_XOR,  MVT::i32, Expand);
    749     setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
    750     setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
    751     setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
    752     setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
    753     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
    754     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
    755     // Unordered/Monotonic case.
    756     setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
    757     setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
    758     // Since the libcalls include locking, fold in the fences
    759     setShouldFoldAtomicFences(true);
    760   }
    761 
    762   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
    763 
    764   // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
    765   if (!Subtarget->hasV6Ops()) {
    766     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    767     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8,  Expand);
    768   }
    769   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    770 
    771   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    772       !Subtarget->isThumb1Only()) {
    773     // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
    774     // iff target supports vfp2.
    775     setOperationAction(ISD::BITCAST, MVT::i64, Custom);
    776     setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
    777   }
    778 
    779   // We want to custom lower some of our intrinsics.
    780   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    781   if (Subtarget->isTargetDarwin()) {
    782     setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
    783     setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
    784     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
    785   }
    786 
    787   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
    788   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
    789   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
    790   setOperationAction(ISD::SELECT,    MVT::i32, Custom);
    791   setOperationAction(ISD::SELECT,    MVT::f32, Custom);
    792   setOperationAction(ISD::SELECT,    MVT::f64, Custom);
    793   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
    794   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    795   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    796 
    797   setOperationAction(ISD::BRCOND,    MVT::Other, Expand);
    798   setOperationAction(ISD::BR_CC,     MVT::i32,   Custom);
    799   setOperationAction(ISD::BR_CC,     MVT::f32,   Custom);
    800   setOperationAction(ISD::BR_CC,     MVT::f64,   Custom);
    801   setOperationAction(ISD::BR_JT,     MVT::Other, Custom);
    802 
    803   // We don't support sin/cos/fmod/copysign/pow
    804   setOperationAction(ISD::FSIN,      MVT::f64, Expand);
    805   setOperationAction(ISD::FSIN,      MVT::f32, Expand);
    806   setOperationAction(ISD::FCOS,      MVT::f32, Expand);
    807   setOperationAction(ISD::FCOS,      MVT::f64, Expand);
    808   setOperationAction(ISD::FSINCOS,   MVT::f64, Expand);
    809   setOperationAction(ISD::FSINCOS,   MVT::f32, Expand);
    810   setOperationAction(ISD::FREM,      MVT::f64, Expand);
    811   setOperationAction(ISD::FREM,      MVT::f32, Expand);
    812   if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() &&
    813       !Subtarget->isThumb1Only()) {
    814     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
    815     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
    816   }
    817   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
    818   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
    819 
    820   if (!Subtarget->hasVFP4()) {
    821     setOperationAction(ISD::FMA, MVT::f64, Expand);
    822     setOperationAction(ISD::FMA, MVT::f32, Expand);
    823   }
    824 
    825   // Various VFP goodness
    826   if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) {
    827     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
    828     if (Subtarget->hasVFP2()) {
    829       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    830       setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
    831       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    832       setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    833     }
    834     // Special handling for half-precision FP.
    835     if (!Subtarget->hasFP16()) {
    836       setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand);
    837       setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand);
    838     }
    839   }
    840 
    841   // We have target-specific dag combine patterns for the following nodes:
    842   // ARMISD::VMOVRRD  - No need to call setTargetDAGCombine
    843   setTargetDAGCombine(ISD::ADD);
    844   setTargetDAGCombine(ISD::SUB);
    845   setTargetDAGCombine(ISD::MUL);
    846   setTargetDAGCombine(ISD::AND);
    847   setTargetDAGCombine(ISD::OR);
    848   setTargetDAGCombine(ISD::XOR);
    849 
    850   if (Subtarget->hasV6Ops())
    851     setTargetDAGCombine(ISD::SRL);
    852 
    853   setStackPointerRegisterToSaveRestore(ARM::SP);
    854 
    855   if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() ||
    856       !Subtarget->hasVFP2())
    857     setSchedulingPreference(Sched::RegPressure);
    858   else
    859     setSchedulingPreference(Sched::Hybrid);
    860 
    861   //// temporary - rewrite interface to use type
    862   MaxStoresPerMemset = 8;
    863   MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4;
    864   MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
    865   MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    866   MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
    867   MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2;
    868 
    869   // On ARM arguments smaller than 4 bytes are extended, so all arguments
    870   // are at least 4 bytes aligned.
    871   setMinStackArgumentAlignment(4);
    872 
    873   BenefitFromCodePlacementOpt = true;
    874 
    875   // Prefer likely predicted branches to selects on out-of-order cores.
    876   PredictableSelectIsExpensive = Subtarget->isLikeA9();
    877 
    878   setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
    879 }
    880 
    881 // FIXME: It might make sense to define the representative register class as the
    882 // nearest super-register that has a non-null superset. For example, DPR_VFP2 is
    883 // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
    884 // SPR's representative would be DPR_VFP2. This should work well if register
    885 // pressure tracking were modified such that a register use would increment the
    886 // pressure of the register class's representative and all of it's super
    887 // classes' representatives transitively. We have not implemented this because
    888 // of the difficulty prior to coalescing of modeling operand register classes
    889 // due to the common occurrence of cross class copies and subregister insertions
    890 // and extractions.
    891 std::pair<const TargetRegisterClass*, uint8_t>
    892 ARMTargetLowering::findRepresentativeClass(MVT VT) const{
    893   const TargetRegisterClass *RRC = 0;
    894   uint8_t Cost = 1;
    895   switch (VT.SimpleTy) {
    896   default:
    897     return TargetLowering::findRepresentativeClass(VT);
    898   // Use DPR as representative register class for all floating point
    899   // and vector types. Since there are 32 SPR registers and 32 DPR registers so
    900   // the cost is 1 for both f32 and f64.
    901   case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
    902   case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
    903     RRC = &ARM::DPRRegClass;
    904     // When NEON is used for SP, only half of the register file is available
    905     // because operations that define both SP and DP results will be constrained
    906     // to the VFP2 class (D0-D15). We currently model this constraint prior to
    907     // coalescing by double-counting the SP regs. See the FIXME above.
    908     if (Subtarget->useNEONForSinglePrecisionFP())
    909       Cost = 2;
    910     break;
    911   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
    912   case MVT::v4f32: case MVT::v2f64:
    913     RRC = &ARM::DPRRegClass;
    914     Cost = 2;
    915     break;
    916   case MVT::v4i64:
    917     RRC = &ARM::DPRRegClass;
    918     Cost = 4;
    919     break;
    920   case MVT::v8i64:
    921     RRC = &ARM::DPRRegClass;
    922     Cost = 8;
    923     break;
    924   }
    925   return std::make_pair(RRC, Cost);
    926 }
    927 
    928 const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
    929   switch (Opcode) {
    930   default: return 0;
    931   case ARMISD::Wrapper:       return "ARMISD::Wrapper";
    932   case ARMISD::WrapperDYN:    return "ARMISD::WrapperDYN";
    933   case ARMISD::WrapperPIC:    return "ARMISD::WrapperPIC";
    934   case ARMISD::WrapperJT:     return "ARMISD::WrapperJT";
    935   case ARMISD::CALL:          return "ARMISD::CALL";
    936   case ARMISD::CALL_PRED:     return "ARMISD::CALL_PRED";
    937   case ARMISD::CALL_NOLINK:   return "ARMISD::CALL_NOLINK";
    938   case ARMISD::tCALL:         return "ARMISD::tCALL";
    939   case ARMISD::BRCOND:        return "ARMISD::BRCOND";
    940   case ARMISD::BR_JT:         return "ARMISD::BR_JT";
    941   case ARMISD::BR2_JT:        return "ARMISD::BR2_JT";
    942   case ARMISD::RET_FLAG:      return "ARMISD::RET_FLAG";
    943   case ARMISD::PIC_ADD:       return "ARMISD::PIC_ADD";
    944   case ARMISD::CMP:           return "ARMISD::CMP";
    945   case ARMISD::CMN:           return "ARMISD::CMN";
    946   case ARMISD::CMPZ:          return "ARMISD::CMPZ";
    947   case ARMISD::CMPFP:         return "ARMISD::CMPFP";
    948   case ARMISD::CMPFPw0:       return "ARMISD::CMPFPw0";
    949   case ARMISD::BCC_i64:       return "ARMISD::BCC_i64";
    950   case ARMISD::FMSTAT:        return "ARMISD::FMSTAT";
    951 
    952   case ARMISD::CMOV:          return "ARMISD::CMOV";
    953 
    954   case ARMISD::RBIT:          return "ARMISD::RBIT";
    955 
    956   case ARMISD::FTOSI:         return "ARMISD::FTOSI";
    957   case ARMISD::FTOUI:         return "ARMISD::FTOUI";
    958   case ARMISD::SITOF:         return "ARMISD::SITOF";
    959   case ARMISD::UITOF:         return "ARMISD::UITOF";
    960 
    961   case ARMISD::SRL_FLAG:      return "ARMISD::SRL_FLAG";
    962   case ARMISD::SRA_FLAG:      return "ARMISD::SRA_FLAG";
    963   case ARMISD::RRX:           return "ARMISD::RRX";
    964 
    965   case ARMISD::ADDC:          return "ARMISD::ADDC";
    966   case ARMISD::ADDE:          return "ARMISD::ADDE";
    967   case ARMISD::SUBC:          return "ARMISD::SUBC";
    968   case ARMISD::SUBE:          return "ARMISD::SUBE";
    969 
    970   case ARMISD::VMOVRRD:       return "ARMISD::VMOVRRD";
    971   case ARMISD::VMOVDRR:       return "ARMISD::VMOVDRR";
    972 
    973   case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
    974   case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
    975 
    976   case ARMISD::TC_RETURN:     return "ARMISD::TC_RETURN";
    977 
    978   case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
    979 
    980   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
    981 
    982   case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
    983   case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
    984 
    985   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
    986 
    987   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
    988   case ARMISD::VCEQZ:         return "ARMISD::VCEQZ";
    989   case ARMISD::VCGE:          return "ARMISD::VCGE";
    990   case ARMISD::VCGEZ:         return "ARMISD::VCGEZ";
    991   case ARMISD::VCLEZ:         return "ARMISD::VCLEZ";
    992   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
    993   case ARMISD::VCGT:          return "ARMISD::VCGT";
    994   case ARMISD::VCGTZ:         return "ARMISD::VCGTZ";
    995   case ARMISD::VCLTZ:         return "ARMISD::VCLTZ";
    996   case ARMISD::VCGTU:         return "ARMISD::VCGTU";
    997   case ARMISD::VTST:          return "ARMISD::VTST";
    998 
    999   case ARMISD::VSHL:          return "ARMISD::VSHL";
   1000   case ARMISD::VSHRs:         return "ARMISD::VSHRs";
   1001   case ARMISD::VSHRu:         return "ARMISD::VSHRu";
   1002   case ARMISD::VSHLLs:        return "ARMISD::VSHLLs";
   1003   case ARMISD::VSHLLu:        return "ARMISD::VSHLLu";
   1004   case ARMISD::VSHLLi:        return "ARMISD::VSHLLi";
   1005   case ARMISD::VSHRN:         return "ARMISD::VSHRN";
   1006   case ARMISD::VRSHRs:        return "ARMISD::VRSHRs";
   1007   case ARMISD::VRSHRu:        return "ARMISD::VRSHRu";
   1008   case ARMISD::VRSHRN:        return "ARMISD::VRSHRN";
   1009   case ARMISD::VQSHLs:        return "ARMISD::VQSHLs";
   1010   case ARMISD::VQSHLu:        return "ARMISD::VQSHLu";
   1011   case ARMISD::VQSHLsu:       return "ARMISD::VQSHLsu";
   1012   case ARMISD::VQSHRNs:       return "ARMISD::VQSHRNs";
   1013   case ARMISD::VQSHRNu:       return "ARMISD::VQSHRNu";
   1014   case ARMISD::VQSHRNsu:      return "ARMISD::VQSHRNsu";
   1015   case ARMISD::VQRSHRNs:      return "ARMISD::VQRSHRNs";
   1016   case ARMISD::VQRSHRNu:      return "ARMISD::VQRSHRNu";
   1017   case ARMISD::VQRSHRNsu:     return "ARMISD::VQRSHRNsu";
   1018   case ARMISD::VGETLANEu:     return "ARMISD::VGETLANEu";
   1019   case ARMISD::VGETLANEs:     return "ARMISD::VGETLANEs";
   1020   case ARMISD::VMOVIMM:       return "ARMISD::VMOVIMM";
   1021   case ARMISD::VMVNIMM:       return "ARMISD::VMVNIMM";
   1022   case ARMISD::VMOVFPIMM:     return "ARMISD::VMOVFPIMM";
   1023   case ARMISD::VDUP:          return "ARMISD::VDUP";
   1024   case ARMISD::VDUPLANE:      return "ARMISD::VDUPLANE";
   1025   case ARMISD::VEXT:          return "ARMISD::VEXT";
   1026   case ARMISD::VREV64:        return "ARMISD::VREV64";
   1027   case ARMISD::VREV32:        return "ARMISD::VREV32";
   1028   case ARMISD::VREV16:        return "ARMISD::VREV16";
   1029   case ARMISD::VZIP:          return "ARMISD::VZIP";
   1030   case ARMISD::VUZP:          return "ARMISD::VUZP";
   1031   case ARMISD::VTRN:          return "ARMISD::VTRN";
   1032   case ARMISD::VTBL1:         return "ARMISD::VTBL1";
   1033   case ARMISD::VTBL2:         return "ARMISD::VTBL2";
   1034   case ARMISD::VMULLs:        return "ARMISD::VMULLs";
   1035   case ARMISD::VMULLu:        return "ARMISD::VMULLu";
   1036   case ARMISD::UMLAL:         return "ARMISD::UMLAL";
   1037   case ARMISD::SMLAL:         return "ARMISD::SMLAL";
   1038   case ARMISD::BUILD_VECTOR:  return "ARMISD::BUILD_VECTOR";
   1039   case ARMISD::FMAX:          return "ARMISD::FMAX";
   1040   case ARMISD::FMIN:          return "ARMISD::FMIN";
   1041   case ARMISD::BFI:           return "ARMISD::BFI";
   1042   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   1043   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
   1044   case ARMISD::VBSL:          return "ARMISD::VBSL";
   1045   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   1046   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   1047   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
   1048   case ARMISD::VLD1_UPD:      return "ARMISD::VLD1_UPD";
   1049   case ARMISD::VLD2_UPD:      return "ARMISD::VLD2_UPD";
   1050   case ARMISD::VLD3_UPD:      return "ARMISD::VLD3_UPD";
   1051   case ARMISD::VLD4_UPD:      return "ARMISD::VLD4_UPD";
   1052   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
   1053   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
   1054   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
   1055   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
   1056   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
   1057   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
   1058   case ARMISD::VST1_UPD:      return "ARMISD::VST1_UPD";
   1059   case ARMISD::VST2_UPD:      return "ARMISD::VST2_UPD";
   1060   case ARMISD::VST3_UPD:      return "ARMISD::VST3_UPD";
   1061   case ARMISD::VST4_UPD:      return "ARMISD::VST4_UPD";
   1062   case ARMISD::VST2LN_UPD:    return "ARMISD::VST2LN_UPD";
   1063   case ARMISD::VST3LN_UPD:    return "ARMISD::VST3LN_UPD";
   1064   case ARMISD::VST4LN_UPD:    return "ARMISD::VST4LN_UPD";
   1065   }
   1066 }
   1067 
   1068 EVT ARMTargetLowering::getSetCCResultType(EVT VT) const {
   1069   if (!VT.isVector()) return getPointerTy();
   1070   return VT.changeVectorElementTypeToInteger();
   1071 }
   1072 
   1073 /// getRegClassFor - Return the register class that should be used for the
   1074 /// specified value type.
   1075 const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
   1076   // Map v4i64 to QQ registers but do not make the type legal. Similarly map
   1077   // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
   1078   // load / store 4 to 8 consecutive D registers.
   1079   if (Subtarget->hasNEON()) {
   1080     if (VT == MVT::v4i64)
   1081       return &ARM::QQPRRegClass;
   1082     if (VT == MVT::v8i64)
   1083       return &ARM::QQQQPRRegClass;
   1084   }
   1085   return TargetLowering::getRegClassFor(VT);
   1086 }
   1087 
   1088 // Create a fast isel object.
   1089 FastISel *
   1090 ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
   1091                                   const TargetLibraryInfo *libInfo) const {
   1092   return ARM::createFastISel(funcInfo, libInfo);
   1093 }
   1094 
   1095 /// getMaximalGlobalOffset - Returns the maximal possible offset which can
   1096 /// be used for loads / stores from the global.
   1097 unsigned ARMTargetLowering::getMaximalGlobalOffset() const {
   1098   return (Subtarget->isThumb1Only() ? 127 : 4095);
   1099 }
   1100 
   1101 Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   1102   unsigned NumVals = N->getNumValues();
   1103   if (!NumVals)
   1104     return Sched::RegPressure;
   1105 
   1106   for (unsigned i = 0; i != NumVals; ++i) {
   1107     EVT VT = N->getValueType(i);
   1108     if (VT == MVT::Glue || VT == MVT::Other)
   1109       continue;
   1110     if (VT.isFloatingPoint() || VT.isVector())
   1111       return Sched::ILP;
   1112   }
   1113 
   1114   if (!N->isMachineOpcode())
   1115     return Sched::RegPressure;
   1116 
   1117   // Load are scheduled for latency even if there instruction itinerary
   1118   // is not available.
   1119   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   1120   const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
   1121 
   1122   if (MCID.getNumDefs() == 0)
   1123     return Sched::RegPressure;
   1124   if (!Itins->isEmpty() &&
   1125       Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
   1126     return Sched::ILP;
   1127 
   1128   return Sched::RegPressure;
   1129 }
   1130 
   1131 //===----------------------------------------------------------------------===//
   1132 // Lowering Code
   1133 //===----------------------------------------------------------------------===//
   1134 
   1135 /// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
   1136 static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
   1137   switch (CC) {
   1138   default: llvm_unreachable("Unknown condition code!");
   1139   case ISD::SETNE:  return ARMCC::NE;
   1140   case ISD::SETEQ:  return ARMCC::EQ;
   1141   case ISD::SETGT:  return ARMCC::GT;
   1142   case ISD::SETGE:  return ARMCC::GE;
   1143   case ISD::SETLT:  return ARMCC::LT;
   1144   case ISD::SETLE:  return ARMCC::LE;
   1145   case ISD::SETUGT: return ARMCC::HI;
   1146   case ISD::SETUGE: return ARMCC::HS;
   1147   case ISD::SETULT: return ARMCC::LO;
   1148   case ISD::SETULE: return ARMCC::LS;
   1149   }
   1150 }
   1151 
   1152 /// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
   1153 static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
   1154                         ARMCC::CondCodes &CondCode2) {
   1155   CondCode2 = ARMCC::AL;
   1156   switch (CC) {
   1157   default: llvm_unreachable("Unknown FP condition!");
   1158   case ISD::SETEQ:
   1159   case ISD::SETOEQ: CondCode = ARMCC::EQ; break;
   1160   case ISD::SETGT:
   1161   case ISD::SETOGT: CondCode = ARMCC::GT; break;
   1162   case ISD::SETGE:
   1163   case ISD::SETOGE: CondCode = ARMCC::GE; break;
   1164   case ISD::SETOLT: CondCode = ARMCC::MI; break;
   1165   case ISD::SETOLE: CondCode = ARMCC::LS; break;
   1166   case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break;
   1167   case ISD::SETO:   CondCode = ARMCC::VC; break;
   1168   case ISD::SETUO:  CondCode = ARMCC::VS; break;
   1169   case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break;
   1170   case ISD::SETUGT: CondCode = ARMCC::HI; break;
   1171   case ISD::SETUGE: CondCode = ARMCC::PL; break;
   1172   case ISD::SETLT:
   1173   case ISD::SETULT: CondCode = ARMCC::LT; break;
   1174   case ISD::SETLE:
   1175   case ISD::SETULE: CondCode = ARMCC::LE; break;
   1176   case ISD::SETNE:
   1177   case ISD::SETUNE: CondCode = ARMCC::NE; break;
   1178   }
   1179 }
   1180 
   1181 //===----------------------------------------------------------------------===//
   1182 //                      Calling Convention Implementation
   1183 //===----------------------------------------------------------------------===//
   1184 
   1185 #include "ARMGenCallingConv.inc"
   1186 
   1187 /// CCAssignFnForNode - Selects the correct CCAssignFn for a the
   1188 /// given CallingConvention value.
   1189 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
   1190                                                  bool Return,
   1191                                                  bool isVarArg) const {
   1192   switch (CC) {
   1193   default:
   1194     llvm_unreachable("Unsupported calling convention");
   1195   case CallingConv::Fast:
   1196     if (Subtarget->hasVFP2() && !isVarArg) {
   1197       if (!Subtarget->isAAPCS_ABI())
   1198         return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
   1199       // For AAPCS ABI targets, just use VFP variant of the calling convention.
   1200       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1201     }
   1202     // Fallthrough
   1203   case CallingConv::C: {
   1204     // Use target triple & subtarget features to do actual dispatch.
   1205     if (!Subtarget->isAAPCS_ABI())
   1206       return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1207     else if (Subtarget->hasVFP2() &&
   1208              getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
   1209              !isVarArg)
   1210       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1211     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1212   }
   1213   case CallingConv::ARM_AAPCS_VFP:
   1214     if (!isVarArg)
   1215       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
   1216     // Fallthrough
   1217   case CallingConv::ARM_AAPCS:
   1218     return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
   1219   case CallingConv::ARM_APCS:
   1220     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
   1221   case CallingConv::GHC:
   1222     return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
   1223   }
   1224 }
   1225 
   1226 /// LowerCallResult - Lower the result values of a call into the
   1227 /// appropriate copies out of appropriate physical registers.
   1228 SDValue
   1229 ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   1230                                    CallingConv::ID CallConv, bool isVarArg,
   1231                                    const SmallVectorImpl<ISD::InputArg> &Ins,
   1232                                    DebugLoc dl, SelectionDAG &DAG,
   1233                                    SmallVectorImpl<SDValue> &InVals) const {
   1234 
   1235   // Assign locations to each value returned by this call.
   1236   SmallVector<CCValAssign, 16> RVLocs;
   1237   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1238                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1239   CCInfo.AnalyzeCallResult(Ins,
   1240                            CCAssignFnForNode(CallConv, /* Return*/ true,
   1241                                              isVarArg));
   1242 
   1243   // Copy all of the result registers out of their specified physreg.
   1244   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   1245     CCValAssign VA = RVLocs[i];
   1246 
   1247     SDValue Val;
   1248     if (VA.needsCustom()) {
   1249       // Handle f64 or half of a v2f64.
   1250       SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1251                                       InFlag);
   1252       Chain = Lo.getValue(1);
   1253       InFlag = Lo.getValue(2);
   1254       VA = RVLocs[++i]; // skip ahead to next loc
   1255       SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
   1256                                       InFlag);
   1257       Chain = Hi.getValue(1);
   1258       InFlag = Hi.getValue(2);
   1259       Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1260 
   1261       if (VA.getLocVT() == MVT::v2f64) {
   1262         SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   1263         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1264                           DAG.getConstant(0, MVT::i32));
   1265 
   1266         VA = RVLocs[++i]; // skip ahead to next loc
   1267         Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1268         Chain = Lo.getValue(1);
   1269         InFlag = Lo.getValue(2);
   1270         VA = RVLocs[++i]; // skip ahead to next loc
   1271         Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
   1272         Chain = Hi.getValue(1);
   1273         InFlag = Hi.getValue(2);
   1274         Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   1275         Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
   1276                           DAG.getConstant(1, MVT::i32));
   1277       }
   1278     } else {
   1279       Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
   1280                                InFlag);
   1281       Chain = Val.getValue(1);
   1282       InFlag = Val.getValue(2);
   1283     }
   1284 
   1285     switch (VA.getLocInfo()) {
   1286     default: llvm_unreachable("Unknown loc info!");
   1287     case CCValAssign::Full: break;
   1288     case CCValAssign::BCvt:
   1289       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
   1290       break;
   1291     }
   1292 
   1293     InVals.push_back(Val);
   1294   }
   1295 
   1296   return Chain;
   1297 }
   1298 
   1299 /// LowerMemOpCallTo - Store the argument to the stack.
   1300 SDValue
   1301 ARMTargetLowering::LowerMemOpCallTo(SDValue Chain,
   1302                                     SDValue StackPtr, SDValue Arg,
   1303                                     DebugLoc dl, SelectionDAG &DAG,
   1304                                     const CCValAssign &VA,
   1305                                     ISD::ArgFlagsTy Flags) const {
   1306   unsigned LocMemOffset = VA.getLocMemOffset();
   1307   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1308   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   1309   return DAG.getStore(Chain, dl, Arg, PtrOff,
   1310                       MachinePointerInfo::getStack(LocMemOffset),
   1311                       false, false, 0);
   1312 }
   1313 
   1314 void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
   1315                                          SDValue Chain, SDValue &Arg,
   1316                                          RegsToPassVector &RegsToPass,
   1317                                          CCValAssign &VA, CCValAssign &NextVA,
   1318                                          SDValue &StackPtr,
   1319                                          SmallVector<SDValue, 8> &MemOpChains,
   1320                                          ISD::ArgFlagsTy Flags) const {
   1321 
   1322   SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   1323                               DAG.getVTList(MVT::i32, MVT::i32), Arg);
   1324   RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
   1325 
   1326   if (NextVA.isRegLoc())
   1327     RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1)));
   1328   else {
   1329     assert(NextVA.isMemLoc());
   1330     if (StackPtr.getNode() == 0)
   1331       StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1332 
   1333     MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1),
   1334                                            dl, DAG, NextVA,
   1335                                            Flags));
   1336   }
   1337 }
   1338 
   1339 /// LowerCall - Lowering a call into a callseq_start <-
   1340 /// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
   1341 /// nodes.
   1342 SDValue
   1343 ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   1344                              SmallVectorImpl<SDValue> &InVals) const {
   1345   SelectionDAG &DAG                     = CLI.DAG;
   1346   DebugLoc &dl                          = CLI.DL;
   1347   SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
   1348   SmallVector<SDValue, 32> &OutVals     = CLI.OutVals;
   1349   SmallVector<ISD::InputArg, 32> &Ins   = CLI.Ins;
   1350   SDValue Chain                         = CLI.Chain;
   1351   SDValue Callee                        = CLI.Callee;
   1352   bool &isTailCall                      = CLI.IsTailCall;
   1353   CallingConv::ID CallConv              = CLI.CallConv;
   1354   bool doesNotRet                       = CLI.DoesNotReturn;
   1355   bool isVarArg                         = CLI.IsVarArg;
   1356 
   1357   MachineFunction &MF = DAG.getMachineFunction();
   1358   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   1359   bool IsSibCall = false;
   1360   // Disable tail calls if they're not supported.
   1361   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   1362     isTailCall = false;
   1363   if (isTailCall) {
   1364     // Check if it's really possible to do a tail call.
   1365     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   1366                     isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
   1367                                                    Outs, OutVals, Ins, DAG);
   1368     // We don't support GuaranteedTailCallOpt for ARM, only automatically
   1369     // detected sibcalls.
   1370     if (isTailCall) {
   1371       ++NumTailCalls;
   1372       IsSibCall = true;
   1373     }
   1374   }
   1375 
   1376   // Analyze operands of the call, assigning locations to each operand.
   1377   SmallVector<CCValAssign, 16> ArgLocs;
   1378   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1379                  getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1380   CCInfo.AnalyzeCallOperands(Outs,
   1381                              CCAssignFnForNode(CallConv, /* Return*/ false,
   1382                                                isVarArg));
   1383 
   1384   // Get a count of how many bytes are to be pushed on the stack.
   1385   unsigned NumBytes = CCInfo.getNextStackOffset();
   1386 
   1387   // For tail calls, memory operands are available in our caller's stack.
   1388   if (IsSibCall)
   1389     NumBytes = 0;
   1390 
   1391   // Adjust the stack pointer for the new arguments...
   1392   // These operations are automatically eliminated by the prolog/epilog pass
   1393   if (!IsSibCall)
   1394     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
   1395 
   1396   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
   1397 
   1398   RegsToPassVector RegsToPass;
   1399   SmallVector<SDValue, 8> MemOpChains;
   1400 
   1401   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   1402   // of tail call optimization, arguments are handled later.
   1403   for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1404        i != e;
   1405        ++i, ++realArgIdx) {
   1406     CCValAssign &VA = ArgLocs[i];
   1407     SDValue Arg = OutVals[realArgIdx];
   1408     ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1409     bool isByVal = Flags.isByVal();
   1410 
   1411     // Promote the value if needed.
   1412     switch (VA.getLocInfo()) {
   1413     default: llvm_unreachable("Unknown loc info!");
   1414     case CCValAssign::Full: break;
   1415     case CCValAssign::SExt:
   1416       Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
   1417       break;
   1418     case CCValAssign::ZExt:
   1419       Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
   1420       break;
   1421     case CCValAssign::AExt:
   1422       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
   1423       break;
   1424     case CCValAssign::BCvt:
   1425       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1426       break;
   1427     }
   1428 
   1429     // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
   1430     if (VA.needsCustom()) {
   1431       if (VA.getLocVT() == MVT::v2f64) {
   1432         SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1433                                   DAG.getConstant(0, MVT::i32));
   1434         SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1435                                   DAG.getConstant(1, MVT::i32));
   1436 
   1437         PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
   1438                          VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1439 
   1440         VA = ArgLocs[++i]; // skip ahead to next loc
   1441         if (VA.isRegLoc()) {
   1442           PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
   1443                            VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
   1444         } else {
   1445           assert(VA.isMemLoc());
   1446 
   1447           MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
   1448                                                  dl, DAG, VA, Flags));
   1449         }
   1450       } else {
   1451         PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
   1452                          StackPtr, MemOpChains, Flags);
   1453       }
   1454     } else if (VA.isRegLoc()) {
   1455       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   1456     } else if (isByVal) {
   1457       assert(VA.isMemLoc());
   1458       unsigned offset = 0;
   1459 
   1460       // True if this byval aggregate will be split between registers
   1461       // and memory.
   1462       if (CCInfo.isFirstByValRegValid()) {
   1463         EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   1464         unsigned int i, j;
   1465         for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) {
   1466           SDValue Const = DAG.getConstant(4*i, MVT::i32);
   1467           SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
   1468           SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
   1469                                      MachinePointerInfo(),
   1470                                      false, false, false, 0);
   1471           MemOpChains.push_back(Load.getValue(1));
   1472           RegsToPass.push_back(std::make_pair(j, Load));
   1473         }
   1474         offset = ARM::R4 - CCInfo.getFirstByValReg();
   1475         CCInfo.clearFirstByValReg();
   1476       }
   1477 
   1478       if (Flags.getByValSize() - 4*offset > 0) {
   1479         unsigned LocMemOffset = VA.getLocMemOffset();
   1480         SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset);
   1481         SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
   1482                                   StkPtrOff);
   1483         SDValue SrcOffset = DAG.getIntPtrConstant(4*offset);
   1484         SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset);
   1485         SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset,
   1486                                            MVT::i32);
   1487         SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32);
   1488 
   1489         SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
   1490         SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
   1491         MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
   1492                                           Ops, array_lengthof(Ops)));
   1493       }
   1494     } else if (!IsSibCall) {
   1495       assert(VA.isMemLoc());
   1496 
   1497       MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
   1498                                              dl, DAG, VA, Flags));
   1499     }
   1500   }
   1501 
   1502   if (!MemOpChains.empty())
   1503     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1504                         &MemOpChains[0], MemOpChains.size());
   1505 
   1506   // Build a sequence of copy-to-reg nodes chained together with token chain
   1507   // and flag operands which copy the outgoing args into the appropriate regs.
   1508   SDValue InFlag;
   1509   // Tail call byval lowering might overwrite argument registers so in case of
   1510   // tail call optimization the copies to registers are lowered later.
   1511   if (!isTailCall)
   1512     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1513       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1514                                RegsToPass[i].second, InFlag);
   1515       InFlag = Chain.getValue(1);
   1516     }
   1517 
   1518   // For tail calls lower the arguments to the 'real' stack slot.
   1519   if (isTailCall) {
   1520     // Force all the incoming stack arguments to be loaded from the stack
   1521     // before any new outgoing arguments are stored to the stack, because the
   1522     // outgoing stack slots may alias the incoming argument stack slots, and
   1523     // the alias isn't otherwise explicit. This is slightly more conservative
   1524     // than necessary, because it means that each store effectively depends
   1525     // on every argument instead of just those arguments it would clobber.
   1526 
   1527     // Do not flag preceding copytoreg stuff together with the following stuff.
   1528     InFlag = SDValue();
   1529     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1530       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1531                                RegsToPass[i].second, InFlag);
   1532       InFlag = Chain.getValue(1);
   1533     }
   1534     InFlag =SDValue();
   1535   }
   1536 
   1537   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   1538   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   1539   // node so that legalize doesn't hack it.
   1540   bool isDirect = false;
   1541   bool isARMFunc = false;
   1542   bool isLocalARMFunc = false;
   1543   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   1544 
   1545   if (EnableARMLongCalls) {
   1546     assert (getTargetMachine().getRelocationModel() == Reloc::Static
   1547             && "long-calls with non-static relocation model!");
   1548     // Handle a global address or an external symbol. If it's not one of
   1549     // those, the target's already in a register, so we don't need to do
   1550     // anything extra.
   1551     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1552       const GlobalValue *GV = G->getGlobal();
   1553       // Create a constant pool entry for the callee address
   1554       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1555       ARMConstantPoolValue *CPV =
   1556         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
   1557 
   1558       // Get the address of the callee into a register
   1559       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1560       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1561       Callee = DAG.getLoad(getPointerTy(), dl,
   1562                            DAG.getEntryNode(), CPAddr,
   1563                            MachinePointerInfo::getConstantPool(),
   1564                            false, false, false, 0);
   1565     } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1566       const char *Sym = S->getSymbol();
   1567 
   1568       // Create a constant pool entry for the callee address
   1569       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1570       ARMConstantPoolValue *CPV =
   1571         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1572                                       ARMPCLabelIndex, 0);
   1573       // Get the address of the callee into a register
   1574       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1575       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1576       Callee = DAG.getLoad(getPointerTy(), dl,
   1577                            DAG.getEntryNode(), CPAddr,
   1578                            MachinePointerInfo::getConstantPool(),
   1579                            false, false, false, 0);
   1580     }
   1581   } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1582     const GlobalValue *GV = G->getGlobal();
   1583     isDirect = true;
   1584     bool isExt = GV->isDeclaration() || GV->isWeakForLinker();
   1585     bool isStub = (isExt && Subtarget->isTargetDarwin()) &&
   1586                    getTargetMachine().getRelocationModel() != Reloc::Static;
   1587     isARMFunc = !Subtarget->isThumb() || isStub;
   1588     // ARM call to a local ARM function is predicable.
   1589     isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking);
   1590     // tBX takes a register source operand.
   1591     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1592       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1593       ARMConstantPoolValue *CPV =
   1594         ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4);
   1595       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1596       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1597       Callee = DAG.getLoad(getPointerTy(), dl,
   1598                            DAG.getEntryNode(), CPAddr,
   1599                            MachinePointerInfo::getConstantPool(),
   1600                            false, false, false, 0);
   1601       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1602       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1603                            getPointerTy(), Callee, PICLabel);
   1604     } else {
   1605       // On ELF targets for PIC code, direct calls should go through the PLT
   1606       unsigned OpFlags = 0;
   1607       if (Subtarget->isTargetELF() &&
   1608           getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1609         OpFlags = ARMII::MO_PLT;
   1610       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags);
   1611     }
   1612   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1613     isDirect = true;
   1614     bool isStub = Subtarget->isTargetDarwin() &&
   1615                   getTargetMachine().getRelocationModel() != Reloc::Static;
   1616     isARMFunc = !Subtarget->isThumb() || isStub;
   1617     // tBX takes a register source operand.
   1618     const char *Sym = S->getSymbol();
   1619     if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
   1620       unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   1621       ARMConstantPoolValue *CPV =
   1622         ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
   1623                                       ARMPCLabelIndex, 4);
   1624       SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
   1625       CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   1626       Callee = DAG.getLoad(getPointerTy(), dl,
   1627                            DAG.getEntryNode(), CPAddr,
   1628                            MachinePointerInfo::getConstantPool(),
   1629                            false, false, false, 0);
   1630       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   1631       Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
   1632                            getPointerTy(), Callee, PICLabel);
   1633     } else {
   1634       unsigned OpFlags = 0;
   1635       // On ELF targets for PIC code, direct calls should go through the PLT
   1636       if (Subtarget->isTargetELF() &&
   1637                   getTargetMachine().getRelocationModel() == Reloc::PIC_)
   1638         OpFlags = ARMII::MO_PLT;
   1639       Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags);
   1640     }
   1641   }
   1642 
   1643   // FIXME: handle tail calls differently.
   1644   unsigned CallOpc;
   1645   bool HasMinSizeAttr = MF.getFunction()->getAttributes().
   1646     hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
   1647   if (Subtarget->isThumb()) {
   1648     if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
   1649       CallOpc = ARMISD::CALL_NOLINK;
   1650     else
   1651       CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL;
   1652   } else {
   1653     if (!isDirect && !Subtarget->hasV5TOps())
   1654       CallOpc = ARMISD::CALL_NOLINK;
   1655     else if (doesNotRet && isDirect && Subtarget->hasRAS() &&
   1656                // Emit regular call when code size is the priority
   1657                !HasMinSizeAttr)
   1658       // "mov lr, pc; b _foo" to avoid confusing the RSP
   1659       CallOpc = ARMISD::CALL_NOLINK;
   1660     else
   1661       CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
   1662   }
   1663 
   1664   std::vector<SDValue> Ops;
   1665   Ops.push_back(Chain);
   1666   Ops.push_back(Callee);
   1667 
   1668   // Add argument registers to the end of the list so that they are known live
   1669   // into the call.
   1670   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   1671     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   1672                                   RegsToPass[i].second.getValueType()));
   1673 
   1674   // Add a register mask operand representing the call-preserved registers.
   1675   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
   1676   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
   1677   assert(Mask && "Missing call preserved mask for calling convention");
   1678   Ops.push_back(DAG.getRegisterMask(Mask));
   1679 
   1680   if (InFlag.getNode())
   1681     Ops.push_back(InFlag);
   1682 
   1683   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1684   if (isTailCall)
   1685     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   1686 
   1687   // Returns a chain and a flag for retval copy to use.
   1688   Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
   1689   InFlag = Chain.getValue(1);
   1690 
   1691   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1692                              DAG.getIntPtrConstant(0, true), InFlag);
   1693   if (!Ins.empty())
   1694     InFlag = Chain.getValue(1);
   1695 
   1696   // Handle result values, copying them out of physregs into vregs that we
   1697   // return.
   1698   return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins,
   1699                          dl, DAG, InVals);
   1700 }
   1701 
   1702 /// HandleByVal - Every parameter *after* a byval parameter is passed
   1703 /// on the stack.  Remember the next parameter register to allocate,
   1704 /// and then confiscate the rest of the parameter registers to insure
   1705 /// this.
   1706 void
   1707 ARMTargetLowering::HandleByVal(
   1708     CCState *State, unsigned &size, unsigned Align) const {
   1709   unsigned reg = State->AllocateReg(GPRArgRegs, 4);
   1710   assert((State->getCallOrPrologue() == Prologue ||
   1711           State->getCallOrPrologue() == Call) &&
   1712          "unhandled ParmContext");
   1713   if ((!State->isFirstByValRegValid()) &&
   1714       (ARM::R0 <= reg) && (reg <= ARM::R3)) {
   1715     if (Subtarget->isAAPCS_ABI() && Align > 4) {
   1716       unsigned AlignInRegs = Align / 4;
   1717       unsigned Waste = (ARM::R4 - reg) % AlignInRegs;
   1718       for (unsigned i = 0; i < Waste; ++i)
   1719         reg = State->AllocateReg(GPRArgRegs, 4);
   1720     }
   1721     if (reg != 0) {
   1722       State->setFirstByValReg(reg);
   1723       // At a call site, a byval parameter that is split between
   1724       // registers and memory needs its size truncated here.  In a
   1725       // function prologue, such byval parameters are reassembled in
   1726       // memory, and are not truncated.
   1727       if (State->getCallOrPrologue() == Call) {
   1728         unsigned excess = 4 * (ARM::R4 - reg);
   1729         assert(size >= excess && "expected larger existing stack allocation");
   1730         size -= excess;
   1731       }
   1732     }
   1733   }
   1734   // Confiscate any remaining parameter registers to preclude their
   1735   // assignment to subsequent parameters.
   1736   while (State->AllocateReg(GPRArgRegs, 4))
   1737     ;
   1738 }
   1739 
   1740 /// MatchingStackOffset - Return true if the given stack call argument is
   1741 /// already available in the same position (relatively) of the caller's
   1742 /// incoming argument stack.
   1743 static
   1744 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
   1745                          MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
   1746                          const TargetInstrInfo *TII) {
   1747   unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
   1748   int FI = INT_MAX;
   1749   if (Arg.getOpcode() == ISD::CopyFromReg) {
   1750     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
   1751     if (!TargetRegisterInfo::isVirtualRegister(VR))
   1752       return false;
   1753     MachineInstr *Def = MRI->getVRegDef(VR);
   1754     if (!Def)
   1755       return false;
   1756     if (!Flags.isByVal()) {
   1757       if (!TII->isLoadFromStackSlot(Def, FI))
   1758         return false;
   1759     } else {
   1760       return false;
   1761     }
   1762   } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
   1763     if (Flags.isByVal())
   1764       // ByVal argument is passed in as a pointer but it's now being
   1765       // dereferenced. e.g.
   1766       // define @foo(%struct.X* %A) {
   1767       //   tail call @bar(%struct.X* byval %A)
   1768       // }
   1769       return false;
   1770     SDValue Ptr = Ld->getBasePtr();
   1771     FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
   1772     if (!FINode)
   1773       return false;
   1774     FI = FINode->getIndex();
   1775   } else
   1776     return false;
   1777 
   1778   assert(FI != INT_MAX);
   1779   if (!MFI->isFixedObjectIndex(FI))
   1780     return false;
   1781   return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
   1782 }
   1783 
   1784 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   1785 /// for tail call optimization. Targets which want to do tail call
   1786 /// optimization should implement this function.
   1787 bool
   1788 ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   1789                                                      CallingConv::ID CalleeCC,
   1790                                                      bool isVarArg,
   1791                                                      bool isCalleeStructRet,
   1792                                                      bool isCallerStructRet,
   1793                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   1794                                     const SmallVectorImpl<SDValue> &OutVals,
   1795                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   1796                                                      SelectionDAG& DAG) const {
   1797   const Function *CallerF = DAG.getMachineFunction().getFunction();
   1798   CallingConv::ID CallerCC = CallerF->getCallingConv();
   1799   bool CCMatch = CallerCC == CalleeCC;
   1800 
   1801   // Look for obvious safe cases to perform tail call optimization that do not
   1802   // require ABI changes. This is what gcc calls sibcall.
   1803 
   1804   // Do not sibcall optimize vararg calls unless the call site is not passing
   1805   // any arguments.
   1806   if (isVarArg && !Outs.empty())
   1807     return false;
   1808 
   1809   // Also avoid sibcall optimization if either caller or callee uses struct
   1810   // return semantics.
   1811   if (isCalleeStructRet || isCallerStructRet)
   1812     return false;
   1813 
   1814   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
   1815   // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
   1816   // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
   1817   // support in the assembler and linker to be used. This would need to be
   1818   // fixed to fully support tail calls in Thumb1.
   1819   //
   1820   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   1821   // LR.  This means if we need to reload LR, it takes an extra instructions,
   1822   // which outweighs the value of the tail call; but here we don't know yet
   1823   // whether LR is going to be used.  Probably the right approach is to
   1824   // generate the tail call here and turn it back into CALL/RET in
   1825   // emitEpilogue if LR is used.
   1826 
   1827   // Thumb1 PIC calls to external symbols use BX, so they can be tail calls,
   1828   // but we need to make sure there are enough registers; the only valid
   1829   // registers are the 4 used for parameters.  We don't currently do this
   1830   // case.
   1831   if (Subtarget->isThumb1Only())
   1832     return false;
   1833 
   1834   // If the calling conventions do not match, then we'd better make sure the
   1835   // results are returned in the same way as what the caller expects.
   1836   if (!CCMatch) {
   1837     SmallVector<CCValAssign, 16> RVLocs1;
   1838     ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
   1839                        getTargetMachine(), RVLocs1, *DAG.getContext(), Call);
   1840     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg));
   1841 
   1842     SmallVector<CCValAssign, 16> RVLocs2;
   1843     ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
   1844                        getTargetMachine(), RVLocs2, *DAG.getContext(), Call);
   1845     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg));
   1846 
   1847     if (RVLocs1.size() != RVLocs2.size())
   1848       return false;
   1849     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
   1850       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
   1851         return false;
   1852       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
   1853         return false;
   1854       if (RVLocs1[i].isRegLoc()) {
   1855         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
   1856           return false;
   1857       } else {
   1858         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
   1859           return false;
   1860       }
   1861     }
   1862   }
   1863 
   1864   // If Caller's vararg or byval argument has been split between registers and
   1865   // stack, do not perform tail call, since part of the argument is in caller's
   1866   // local frame.
   1867   const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction().
   1868                                       getInfo<ARMFunctionInfo>();
   1869   if (AFI_Caller->getVarArgsRegSaveSize())
   1870     return false;
   1871 
   1872   // If the callee takes no arguments then go on to check the results of the
   1873   // call.
   1874   if (!Outs.empty()) {
   1875     // Check if stack adjustment is needed. For now, do not do this if any
   1876     // argument is passed on the stack.
   1877     SmallVector<CCValAssign, 16> ArgLocs;
   1878     ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(),
   1879                       getTargetMachine(), ArgLocs, *DAG.getContext(), Call);
   1880     CCInfo.AnalyzeCallOperands(Outs,
   1881                                CCAssignFnForNode(CalleeCC, false, isVarArg));
   1882     if (CCInfo.getNextStackOffset()) {
   1883       MachineFunction &MF = DAG.getMachineFunction();
   1884 
   1885       // Check if the arguments are already laid out in the right way as
   1886       // the caller's fixed stack objects.
   1887       MachineFrameInfo *MFI = MF.getFrameInfo();
   1888       const MachineRegisterInfo *MRI = &MF.getRegInfo();
   1889       const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   1890       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
   1891            i != e;
   1892            ++i, ++realArgIdx) {
   1893         CCValAssign &VA = ArgLocs[i];
   1894         EVT RegVT = VA.getLocVT();
   1895         SDValue Arg = OutVals[realArgIdx];
   1896         ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
   1897         if (VA.getLocInfo() == CCValAssign::Indirect)
   1898           return false;
   1899         if (VA.needsCustom()) {
   1900           // f64 and vector types are split into multiple registers or
   1901           // register/stack-slot combinations.  The types will not match
   1902           // the registers; give up on memory f64 refs until we figure
   1903           // out what to do about this.
   1904           if (!VA.isRegLoc())
   1905             return false;
   1906           if (!ArgLocs[++i].isRegLoc())
   1907             return false;
   1908           if (RegVT == MVT::v2f64) {
   1909             if (!ArgLocs[++i].isRegLoc())
   1910               return false;
   1911             if (!ArgLocs[++i].isRegLoc())
   1912               return false;
   1913           }
   1914         } else if (!VA.isRegLoc()) {
   1915           if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
   1916                                    MFI, MRI, TII))
   1917             return false;
   1918         }
   1919       }
   1920     }
   1921   }
   1922 
   1923   return true;
   1924 }
   1925 
   1926 bool
   1927 ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
   1928                                   MachineFunction &MF, bool isVarArg,
   1929                                   const SmallVectorImpl<ISD::OutputArg> &Outs,
   1930                                   LLVMContext &Context) const {
   1931   SmallVector<CCValAssign, 16> RVLocs;
   1932   CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context);
   1933   return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
   1934                                                     isVarArg));
   1935 }
   1936 
   1937 SDValue
   1938 ARMTargetLowering::LowerReturn(SDValue Chain,
   1939                                CallingConv::ID CallConv, bool isVarArg,
   1940                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   1941                                const SmallVectorImpl<SDValue> &OutVals,
   1942                                DebugLoc dl, SelectionDAG &DAG) const {
   1943 
   1944   // CCValAssign - represent the assignment of the return value to a location.
   1945   SmallVector<CCValAssign, 16> RVLocs;
   1946 
   1947   // CCState - Info about the registers and stack slots.
   1948   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1949                     getTargetMachine(), RVLocs, *DAG.getContext(), Call);
   1950 
   1951   // Analyze outgoing return values.
   1952   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
   1953                                                isVarArg));
   1954 
   1955   SDValue Flag;
   1956   SmallVector<SDValue, 4> RetOps;
   1957   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   1958 
   1959   // Copy the result values into the output registers.
   1960   for (unsigned i = 0, realRVLocIdx = 0;
   1961        i != RVLocs.size();
   1962        ++i, ++realRVLocIdx) {
   1963     CCValAssign &VA = RVLocs[i];
   1964     assert(VA.isRegLoc() && "Can only return in registers!");
   1965 
   1966     SDValue Arg = OutVals[realRVLocIdx];
   1967 
   1968     switch (VA.getLocInfo()) {
   1969     default: llvm_unreachable("Unknown loc info!");
   1970     case CCValAssign::Full: break;
   1971     case CCValAssign::BCvt:
   1972       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1973       break;
   1974     }
   1975 
   1976     if (VA.needsCustom()) {
   1977       if (VA.getLocVT() == MVT::v2f64) {
   1978         // Extract the first half and return it in two registers.
   1979         SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1980                                    DAG.getConstant(0, MVT::i32));
   1981         SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
   1982                                        DAG.getVTList(MVT::i32, MVT::i32), Half);
   1983 
   1984         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
   1985         Flag = Chain.getValue(1);
   1986         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   1987         VA = RVLocs[++i]; // skip ahead to next loc
   1988         Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
   1989                                  HalfGPRs.getValue(1), Flag);
   1990         Flag = Chain.getValue(1);
   1991         RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   1992         VA = RVLocs[++i]; // skip ahead to next loc
   1993 
   1994         // Extract the 2nd half and fall through to handle it as an f64 value.
   1995         Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
   1996                           DAG.getConstant(1, MVT::i32));
   1997       }
   1998       // Legalize ret f64 -> ret 2 x i32.  We always have fmrrd if f64 is
   1999       // available.
   2000       SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
   2001                                   DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
   2002       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
   2003       Flag = Chain.getValue(1);
   2004       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2005       VA = RVLocs[++i]; // skip ahead to next loc
   2006       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1),
   2007                                Flag);
   2008     } else
   2009       Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
   2010 
   2011     // Guarantee that all emitted copies are
   2012     // stuck together, avoiding something bad.
   2013     Flag = Chain.getValue(1);
   2014     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2015   }
   2016 
   2017   // Update chain and glue.
   2018   RetOps[0] = Chain;
   2019   if (Flag.getNode())
   2020     RetOps.push_back(Flag);
   2021 
   2022   return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other,
   2023                      RetOps.data(), RetOps.size());
   2024 }
   2025 
   2026 bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   2027   if (N->getNumValues() != 1)
   2028     return false;
   2029   if (!N->hasNUsesOfValue(1, 0))
   2030     return false;
   2031 
   2032   SDValue TCChain = Chain;
   2033   SDNode *Copy = *N->use_begin();
   2034   if (Copy->getOpcode() == ISD::CopyToReg) {
   2035     // If the copy has a glue operand, we conservatively assume it isn't safe to
   2036     // perform a tail call.
   2037     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
   2038       return false;
   2039     TCChain = Copy->getOperand(0);
   2040   } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
   2041     SDNode *VMov = Copy;
   2042     // f64 returned in a pair of GPRs.
   2043     SmallPtrSet<SDNode*, 2> Copies;
   2044     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2045          UI != UE; ++UI) {
   2046       if (UI->getOpcode() != ISD::CopyToReg)
   2047         return false;
   2048       Copies.insert(*UI);
   2049     }
   2050     if (Copies.size() > 2)
   2051       return false;
   2052 
   2053     for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
   2054          UI != UE; ++UI) {
   2055       SDValue UseChain = UI->getOperand(0);
   2056       if (Copies.count(UseChain.getNode()))
   2057         // Second CopyToReg
   2058         Copy = *UI;
   2059       else
   2060         // First CopyToReg
   2061         TCChain = UseChain;
   2062     }
   2063   } else if (Copy->getOpcode() == ISD::BITCAST) {
   2064     // f32 returned in a single GPR.
   2065     if (!Copy->hasOneUse())
   2066       return false;
   2067     Copy = *Copy->use_begin();
   2068     if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
   2069       return false;
   2070     Chain = Copy->getOperand(0);
   2071   } else {
   2072     return false;
   2073   }
   2074 
   2075   bool HasRet = false;
   2076   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
   2077        UI != UE; ++UI) {
   2078     if (UI->getOpcode() != ARMISD::RET_FLAG)
   2079       return false;
   2080     HasRet = true;
   2081   }
   2082 
   2083   if (!HasRet)
   2084     return false;
   2085 
   2086   Chain = TCChain;
   2087   return true;
   2088 }
   2089 
   2090 bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   2091   if (!EnableARMTailCalls && !Subtarget->supportsTailCall())
   2092     return false;
   2093 
   2094   if (!CI->isTailCall())
   2095     return false;
   2096 
   2097   return !Subtarget->isThumb1Only();
   2098 }
   2099 
   2100 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
   2101 // their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
   2102 // one of the above mentioned nodes. It has to be wrapped because otherwise
   2103 // Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
   2104 // be used to form addressing mode. These wrapped nodes will be selected
   2105 // into MOVi.
   2106 static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   2107   EVT PtrVT = Op.getValueType();
   2108   // FIXME there is no actual debug info here
   2109   DebugLoc dl = Op.getDebugLoc();
   2110   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
   2111   SDValue Res;
   2112   if (CP->isMachineConstantPoolEntry())
   2113     Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
   2114                                     CP->getAlignment());
   2115   else
   2116     Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
   2117                                     CP->getAlignment());
   2118   return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
   2119 }
   2120 
   2121 unsigned ARMTargetLowering::getJumpTableEncoding() const {
   2122   return MachineJumpTableInfo::EK_Inline;
   2123 }
   2124 
   2125 SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   2126                                              SelectionDAG &DAG) const {
   2127   MachineFunction &MF = DAG.getMachineFunction();
   2128   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2129   unsigned ARMPCLabelIndex = 0;
   2130   DebugLoc DL = Op.getDebugLoc();
   2131   EVT PtrVT = getPointerTy();
   2132   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   2133   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2134   SDValue CPAddr;
   2135   if (RelocM == Reloc::Static) {
   2136     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   2137   } else {
   2138     unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2139     ARMPCLabelIndex = AFI->createPICLabelUId();
   2140     ARMConstantPoolValue *CPV =
   2141       ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
   2142                                       ARMCP::CPBlockAddress, PCAdj);
   2143     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2144   }
   2145   CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
   2146   SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr,
   2147                                MachinePointerInfo::getConstantPool(),
   2148                                false, false, false, 0);
   2149   if (RelocM == Reloc::Static)
   2150     return Result;
   2151   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2152   return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
   2153 }
   2154 
   2155 // Lower ISD::GlobalTLSAddress using the "general dynamic" model
   2156 SDValue
   2157 ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   2158                                                  SelectionDAG &DAG) const {
   2159   DebugLoc dl = GA->getDebugLoc();
   2160   EVT PtrVT = getPointerTy();
   2161   unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2162   MachineFunction &MF = DAG.getMachineFunction();
   2163   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2164   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2165   ARMConstantPoolValue *CPV =
   2166     ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2167                                     ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
   2168   SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2169   Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
   2170   Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument,
   2171                          MachinePointerInfo::getConstantPool(),
   2172                          false, false, false, 0);
   2173   SDValue Chain = Argument.getValue(1);
   2174 
   2175   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2176   Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
   2177 
   2178   // call __tls_get_addr.
   2179   ArgListTy Args;
   2180   ArgListEntry Entry;
   2181   Entry.Node = Argument;
   2182   Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
   2183   Args.push_back(Entry);
   2184   // FIXME: is there useful debug info available here?
   2185   TargetLowering::CallLoweringInfo CLI(Chain,
   2186                 (Type *) Type::getInt32Ty(*DAG.getContext()),
   2187                 false, false, false, false,
   2188                 0, CallingConv::C, /*isTailCall=*/false,
   2189                 /*doesNotRet=*/false, /*isReturnValueUsed=*/true,
   2190                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
   2191   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   2192   return CallResult.first;
   2193 }
   2194 
   2195 // Lower ISD::GlobalTLSAddress using the "initial exec" or
   2196 // "local exec" model.
   2197 SDValue
   2198 ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
   2199                                         SelectionDAG &DAG,
   2200                                         TLSModel::Model model) const {
   2201   const GlobalValue *GV = GA->getGlobal();
   2202   DebugLoc dl = GA->getDebugLoc();
   2203   SDValue Offset;
   2204   SDValue Chain = DAG.getEntryNode();
   2205   EVT PtrVT = getPointerTy();
   2206   // Get the Thread Pointer
   2207   SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2208 
   2209   if (model == TLSModel::InitialExec) {
   2210     MachineFunction &MF = DAG.getMachineFunction();
   2211     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2212     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2213     // Initial exec model.
   2214     unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
   2215     ARMConstantPoolValue *CPV =
   2216       ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
   2217                                       ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
   2218                                       true);
   2219     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2220     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2221     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2222                          MachinePointerInfo::getConstantPool(),
   2223                          false, false, false, 0);
   2224     Chain = Offset.getValue(1);
   2225 
   2226     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2227     Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
   2228 
   2229     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2230                          MachinePointerInfo::getConstantPool(),
   2231                          false, false, false, 0);
   2232   } else {
   2233     // local exec model
   2234     assert(model == TLSModel::LocalExec);
   2235     ARMConstantPoolValue *CPV =
   2236       ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
   2237     Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2238     Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
   2239     Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
   2240                          MachinePointerInfo::getConstantPool(),
   2241                          false, false, false, 0);
   2242   }
   2243 
   2244   // The address of the thread local variable is the add of the thread
   2245   // pointer with the offset of the variable.
   2246   return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
   2247 }
   2248 
   2249 SDValue
   2250 ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   2251   // TODO: implement the "local dynamic" model
   2252   assert(Subtarget->isTargetELF() &&
   2253          "TLS not implemented for non-ELF targets");
   2254   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   2255 
   2256   TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
   2257 
   2258   switch (model) {
   2259     case TLSModel::GeneralDynamic:
   2260     case TLSModel::LocalDynamic:
   2261       return LowerToTLSGeneralDynamicModel(GA, DAG);
   2262     case TLSModel::InitialExec:
   2263     case TLSModel::LocalExec:
   2264       return LowerToTLSExecModels(GA, DAG, model);
   2265   }
   2266   llvm_unreachable("bogus TLS model");
   2267 }
   2268 
   2269 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
   2270                                                  SelectionDAG &DAG) const {
   2271   EVT PtrVT = getPointerTy();
   2272   DebugLoc dl = Op.getDebugLoc();
   2273   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2274   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   2275     bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility();
   2276     ARMConstantPoolValue *CPV =
   2277       ARMConstantPoolConstant::Create(GV,
   2278                                       UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT);
   2279     SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2280     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2281     SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
   2282                                  CPAddr,
   2283                                  MachinePointerInfo::getConstantPool(),
   2284                                  false, false, false, 0);
   2285     SDValue Chain = Result.getValue(1);
   2286     SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   2287     Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT);
   2288     if (!UseGOTOFF)
   2289       Result = DAG.getLoad(PtrVT, dl, Chain, Result,
   2290                            MachinePointerInfo::getGOT(),
   2291                            false, false, false, 0);
   2292     return Result;
   2293   }
   2294 
   2295   // If we have T2 ops, we can materialize the address directly via movt/movw
   2296   // pair. This is always cheaper.
   2297   if (Subtarget->useMovt()) {
   2298     ++NumMovwMovt;
   2299     // FIXME: Once remat is capable of dealing with instructions with register
   2300     // operands, expand this into two nodes.
   2301     return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2302                        DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2303   } else {
   2304     SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2305     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2306     return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2307                        MachinePointerInfo::getConstantPool(),
   2308                        false, false, false, 0);
   2309   }
   2310 }
   2311 
   2312 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
   2313                                                     SelectionDAG &DAG) const {
   2314   EVT PtrVT = getPointerTy();
   2315   DebugLoc dl = Op.getDebugLoc();
   2316   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   2317   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2318 
   2319   // FIXME: Enable this for static codegen when tool issues are fixed.  Also
   2320   // update ARMFastISel::ARMMaterializeGV.
   2321   if (Subtarget->useMovt() && RelocM != Reloc::Static) {
   2322     ++NumMovwMovt;
   2323     // FIXME: Once remat is capable of dealing with instructions with register
   2324     // operands, expand this into two nodes.
   2325     if (RelocM == Reloc::Static)
   2326       return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
   2327                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2328 
   2329     unsigned Wrapper = (RelocM == Reloc::PIC_)
   2330       ? ARMISD::WrapperPIC : ARMISD::WrapperDYN;
   2331     SDValue Result = DAG.getNode(Wrapper, dl, PtrVT,
   2332                                  DAG.getTargetGlobalAddress(GV, dl, PtrVT));
   2333     if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2334       Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
   2335                            MachinePointerInfo::getGOT(),
   2336                            false, false, false, 0);
   2337     return Result;
   2338   }
   2339 
   2340   unsigned ARMPCLabelIndex = 0;
   2341   SDValue CPAddr;
   2342   if (RelocM == Reloc::Static) {
   2343     CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
   2344   } else {
   2345     ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   2346     ARMPCLabelIndex = AFI->createPICLabelUId();
   2347     unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
   2348     ARMConstantPoolValue *CPV =
   2349       ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue,
   2350                                       PCAdj);
   2351     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2352   }
   2353   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2354 
   2355   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2356                                MachinePointerInfo::getConstantPool(),
   2357                                false, false, false, 0);
   2358   SDValue Chain = Result.getValue(1);
   2359 
   2360   if (RelocM == Reloc::PIC_) {
   2361     SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2362     Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2363   }
   2364 
   2365   if (Subtarget->GVIsIndirectSymbol(GV, RelocM))
   2366     Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(),
   2367                          false, false, false, 0);
   2368 
   2369   return Result;
   2370 }
   2371 
   2372 SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
   2373                                                     SelectionDAG &DAG) const {
   2374   assert(Subtarget->isTargetELF() &&
   2375          "GLOBAL OFFSET TABLE not implemented for non-ELF targets");
   2376   MachineFunction &MF = DAG.getMachineFunction();
   2377   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2378   unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2379   EVT PtrVT = getPointerTy();
   2380   DebugLoc dl = Op.getDebugLoc();
   2381   unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
   2382   ARMConstantPoolValue *CPV =
   2383     ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_",
   2384                                   ARMPCLabelIndex, PCAdj);
   2385   SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2386   CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2387   SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2388                                MachinePointerInfo::getConstantPool(),
   2389                                false, false, false, 0);
   2390   SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2391   return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2392 }
   2393 
   2394 SDValue
   2395 ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
   2396   DebugLoc dl = Op.getDebugLoc();
   2397   SDValue Val = DAG.getConstant(0, MVT::i32);
   2398   return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
   2399                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
   2400                      Op.getOperand(1), Val);
   2401 }
   2402 
   2403 SDValue
   2404 ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
   2405   DebugLoc dl = Op.getDebugLoc();
   2406   return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
   2407                      Op.getOperand(1), DAG.getConstant(0, MVT::i32));
   2408 }
   2409 
   2410 SDValue
   2411 ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
   2412                                           const ARMSubtarget *Subtarget) const {
   2413   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2414   DebugLoc dl = Op.getDebugLoc();
   2415   switch (IntNo) {
   2416   default: return SDValue();    // Don't custom lower most intrinsics.
   2417   case Intrinsic::arm_thread_pointer: {
   2418     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2419     return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
   2420   }
   2421   case Intrinsic::eh_sjlj_lsda: {
   2422     MachineFunction &MF = DAG.getMachineFunction();
   2423     ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2424     unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
   2425     EVT PtrVT = getPointerTy();
   2426     Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   2427     SDValue CPAddr;
   2428     unsigned PCAdj = (RelocM != Reloc::PIC_)
   2429       ? 0 : (Subtarget->isThumb() ? 4 : 8);
   2430     ARMConstantPoolValue *CPV =
   2431       ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
   2432                                       ARMCP::CPLSDA, PCAdj);
   2433     CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
   2434     CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
   2435     SDValue Result =
   2436       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
   2437                   MachinePointerInfo::getConstantPool(),
   2438                   false, false, false, 0);
   2439 
   2440     if (RelocM == Reloc::PIC_) {
   2441       SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
   2442       Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
   2443     }
   2444     return Result;
   2445   }
   2446   case Intrinsic::arm_neon_vmulls:
   2447   case Intrinsic::arm_neon_vmullu: {
   2448     unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
   2449       ? ARMISD::VMULLs : ARMISD::VMULLu;
   2450     return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(),
   2451                        Op.getOperand(1), Op.getOperand(2));
   2452   }
   2453   }
   2454 }
   2455 
   2456 static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
   2457                                const ARMSubtarget *Subtarget) {
   2458   DebugLoc dl = Op.getDebugLoc();
   2459   if (!Subtarget->hasDataBarrier()) {
   2460     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2461     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2462     // here.
   2463     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2464            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2465     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2466                        DAG.getConstant(0, MVT::i32));
   2467   }
   2468 
   2469   SDValue Op5 = Op.getOperand(5);
   2470   bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0;
   2471   unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   2472   unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
   2473   bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0);
   2474 
   2475   ARM_MB::MemBOpt DMBOpt;
   2476   if (isDeviceBarrier)
   2477     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY;
   2478   else
   2479     DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH;
   2480   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2481                      DAG.getConstant(DMBOpt, MVT::i32));
   2482 }
   2483 
   2484 
   2485 static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
   2486                                  const ARMSubtarget *Subtarget) {
   2487   // FIXME: handle "fence singlethread" more efficiently.
   2488   DebugLoc dl = Op.getDebugLoc();
   2489   if (!Subtarget->hasDataBarrier()) {
   2490     // Some ARMv6 cpus can support data barriers with an mcr instruction.
   2491     // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
   2492     // here.
   2493     assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&
   2494            "Unexpected ISD::MEMBARRIER encountered. Should be libcall!");
   2495     return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
   2496                        DAG.getConstant(0, MVT::i32));
   2497   }
   2498 
   2499   return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
   2500                      DAG.getConstant(ARM_MB::ISH, MVT::i32));
   2501 }
   2502 
   2503 static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
   2504                              const ARMSubtarget *Subtarget) {
   2505   // ARM pre v5TE and Thumb1 does not have preload instructions.
   2506   if (!(Subtarget->isThumb2() ||
   2507         (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
   2508     // Just preserve the chain.
   2509     return Op.getOperand(0);
   2510 
   2511   DebugLoc dl = Op.getDebugLoc();
   2512   unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
   2513   if (!isRead &&
   2514       (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
   2515     // ARMv7 with MP extension has PLDW.
   2516     return Op.getOperand(0);
   2517 
   2518   unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
   2519   if (Subtarget->isThumb()) {
   2520     // Invert the bits.
   2521     isRead = ~isRead & 1;
   2522     isData = ~isData & 1;
   2523   }
   2524 
   2525   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
   2526                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
   2527                      DAG.getConstant(isData, MVT::i32));
   2528 }
   2529 
   2530 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
   2531   MachineFunction &MF = DAG.getMachineFunction();
   2532   ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
   2533 
   2534   // vastart just stores the address of the VarArgsFrameIndex slot into the
   2535   // memory location argument.
   2536   DebugLoc dl = Op.getDebugLoc();
   2537   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
   2538   SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
   2539   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2540   return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
   2541                       MachinePointerInfo(SV), false, false, 0);
   2542 }
   2543 
   2544 SDValue
   2545 ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
   2546                                         SDValue &Root, SelectionDAG &DAG,
   2547                                         DebugLoc dl) const {
   2548   MachineFunction &MF = DAG.getMachineFunction();
   2549   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2550 
   2551   const TargetRegisterClass *RC;
   2552   if (AFI->isThumb1OnlyFunction())
   2553     RC = &ARM::tGPRRegClass;
   2554   else
   2555     RC = &ARM::GPRRegClass;
   2556 
   2557   // Transform the arguments stored in physical registers into virtual ones.
   2558   unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2559   SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2560 
   2561   SDValue ArgValue2;
   2562   if (NextVA.isMemLoc()) {
   2563     MachineFrameInfo *MFI = MF.getFrameInfo();
   2564     int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
   2565 
   2566     // Create load node to retrieve arguments from the stack.
   2567     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2568     ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN,
   2569                             MachinePointerInfo::getFixedStack(FI),
   2570                             false, false, false, 0);
   2571   } else {
   2572     Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
   2573     ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
   2574   }
   2575 
   2576   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
   2577 }
   2578 
   2579 void
   2580 ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF,
   2581                                   unsigned &VARegSize, unsigned &VARegSaveSize)
   2582   const {
   2583   unsigned NumGPRs;
   2584   if (CCInfo.isFirstByValRegValid())
   2585     NumGPRs = ARM::R4 - CCInfo.getFirstByValReg();
   2586   else {
   2587     unsigned int firstUnalloced;
   2588     firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs,
   2589                                                 sizeof(GPRArgRegs) /
   2590                                                 sizeof(GPRArgRegs[0]));
   2591     NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0;
   2592   }
   2593 
   2594   unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment();
   2595   VARegSize = NumGPRs * 4;
   2596   VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1);
   2597 }
   2598 
   2599 // The remaining GPRs hold either the beginning of variable-argument
   2600 // data, or the beginning of an aggregate passed by value (usually
   2601 // byval).  Either way, we allocate stack slots adjacent to the data
   2602 // provided by our caller, and store the unallocated registers there.
   2603 // If this is a variadic function, the va_list pointer will begin with
   2604 // these values; otherwise, this reassembles a (byval) structure that
   2605 // was split between registers and memory.
   2606 void
   2607 ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
   2608                                         DebugLoc dl, SDValue &Chain,
   2609                                         const Value *OrigArg,
   2610                                         unsigned OffsetFromOrigArg,
   2611                                         unsigned ArgOffset,
   2612                                         bool ForceMutable) const {
   2613   MachineFunction &MF = DAG.getMachineFunction();
   2614   MachineFrameInfo *MFI = MF.getFrameInfo();
   2615   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2616   unsigned firstRegToSaveIndex;
   2617   if (CCInfo.isFirstByValRegValid())
   2618     firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0;
   2619   else {
   2620     firstRegToSaveIndex = CCInfo.getFirstUnallocated
   2621       (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0]));
   2622   }
   2623 
   2624   unsigned VARegSize, VARegSaveSize;
   2625   computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize);
   2626   if (VARegSaveSize) {
   2627     // If this function is vararg, store any remaining integer argument regs
   2628     // to their spots on the stack so that they may be loaded by deferencing
   2629     // the result of va_next.
   2630     AFI->setVarArgsRegSaveSize(VARegSaveSize);
   2631     AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize,
   2632                                                      ArgOffset + VARegSaveSize
   2633                                                      - VARegSize,
   2634                                                      false));
   2635     SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(),
   2636                                     getPointerTy());
   2637 
   2638     SmallVector<SDValue, 4> MemOps;
   2639     for (unsigned i = 0; firstRegToSaveIndex < 4; ++firstRegToSaveIndex, ++i) {
   2640       const TargetRegisterClass *RC;
   2641       if (AFI->isThumb1OnlyFunction())
   2642         RC = &ARM::tGPRRegClass;
   2643       else
   2644         RC = &ARM::GPRRegClass;
   2645 
   2646       unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC);
   2647       SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
   2648       SDValue Store =
   2649         DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2650                      MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i),
   2651                      false, false, 0);
   2652       MemOps.push_back(Store);
   2653       FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN,
   2654                         DAG.getConstant(4, getPointerTy()));
   2655     }
   2656     if (!MemOps.empty())
   2657       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   2658                           &MemOps[0], MemOps.size());
   2659   } else
   2660     // This will point to the next argument passed via stack.
   2661     AFI->setVarArgsFrameIndex(
   2662         MFI->CreateFixedObject(4, ArgOffset, !ForceMutable));
   2663 }
   2664 
   2665 SDValue
   2666 ARMTargetLowering::LowerFormalArguments(SDValue Chain,
   2667                                         CallingConv::ID CallConv, bool isVarArg,
   2668                                         const SmallVectorImpl<ISD::InputArg>
   2669                                           &Ins,
   2670                                         DebugLoc dl, SelectionDAG &DAG,
   2671                                         SmallVectorImpl<SDValue> &InVals)
   2672                                           const {
   2673   MachineFunction &MF = DAG.getMachineFunction();
   2674   MachineFrameInfo *MFI = MF.getFrameInfo();
   2675 
   2676   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2677 
   2678   // Assign locations to all of the incoming arguments.
   2679   SmallVector<CCValAssign, 16> ArgLocs;
   2680   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   2681                     getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue);
   2682   CCInfo.AnalyzeFormalArguments(Ins,
   2683                                 CCAssignFnForNode(CallConv, /* Return*/ false,
   2684                                                   isVarArg));
   2685 
   2686   SmallVector<SDValue, 16> ArgValues;
   2687   int lastInsIndex = -1;
   2688   SDValue ArgValue;
   2689   Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
   2690   unsigned CurArgIdx = 0;
   2691   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2692     CCValAssign &VA = ArgLocs[i];
   2693     std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx);
   2694     CurArgIdx = Ins[VA.getValNo()].OrigArgIndex;
   2695     // Arguments stored in registers.
   2696     if (VA.isRegLoc()) {
   2697       EVT RegVT = VA.getLocVT();
   2698 
   2699       if (VA.needsCustom()) {
   2700         // f64 and vector types are split up into multiple registers or
   2701         // combinations of registers and stack slots.
   2702         if (VA.getLocVT() == MVT::v2f64) {
   2703           SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
   2704                                                    Chain, DAG, dl);
   2705           VA = ArgLocs[++i]; // skip ahead to next loc
   2706           SDValue ArgValue2;
   2707           if (VA.isMemLoc()) {
   2708             int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
   2709             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2710             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
   2711                                     MachinePointerInfo::getFixedStack(FI),
   2712                                     false, false, false, 0);
   2713           } else {
   2714             ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
   2715                                              Chain, DAG, dl);
   2716           }
   2717           ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
   2718           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2719                                  ArgValue, ArgValue1, DAG.getIntPtrConstant(0));
   2720           ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
   2721                                  ArgValue, ArgValue2, DAG.getIntPtrConstant(1));
   2722         } else
   2723           ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
   2724 
   2725       } else {
   2726         const TargetRegisterClass *RC;
   2727 
   2728         if (RegVT == MVT::f32)
   2729           RC = &ARM::SPRRegClass;
   2730         else if (RegVT == MVT::f64)
   2731           RC = &ARM::DPRRegClass;
   2732         else if (RegVT == MVT::v2f64)
   2733           RC = &ARM::QPRRegClass;
   2734         else if (RegVT == MVT::i32)
   2735           RC = AFI->isThumb1OnlyFunction() ?
   2736             (const TargetRegisterClass*)&ARM::tGPRRegClass :
   2737             (const TargetRegisterClass*)&ARM::GPRRegClass;
   2738         else
   2739           llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
   2740 
   2741         // Transform the arguments in physical registers into virtual ones.
   2742         unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2743         ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
   2744       }
   2745 
   2746       // If this is an 8 or 16-bit value, it is really passed promoted
   2747       // to 32 bits.  Insert an assert[sz]ext to capture this, then
   2748       // truncate to the right size.
   2749       switch (VA.getLocInfo()) {
   2750       default: llvm_unreachable("Unknown loc info!");
   2751       case CCValAssign::Full: break;
   2752       case CCValAssign::BCvt:
   2753         ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
   2754         break;
   2755       case CCValAssign::SExt:
   2756         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
   2757                                DAG.getValueType(VA.getValVT()));
   2758         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2759         break;
   2760       case CCValAssign::ZExt:
   2761         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
   2762                                DAG.getValueType(VA.getValVT()));
   2763         ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2764         break;
   2765       }
   2766 
   2767       InVals.push_back(ArgValue);
   2768 
   2769     } else { // VA.isRegLoc()
   2770 
   2771       // sanity check
   2772       assert(VA.isMemLoc());
   2773       assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
   2774 
   2775       int index = ArgLocs[i].getValNo();
   2776 
   2777       // Some Ins[] entries become multiple ArgLoc[] entries.
   2778       // Process them only once.
   2779       if (index != lastInsIndex)
   2780         {
   2781           ISD::ArgFlagsTy Flags = Ins[index].Flags;
   2782           // FIXME: For now, all byval parameter objects are marked mutable.
   2783           // This can be changed with more analysis.
   2784           // In case of tail call optimization mark all arguments mutable.
   2785           // Since they could be overwritten by lowering of arguments in case of
   2786           // a tail call.
   2787           if (Flags.isByVal()) {
   2788             ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   2789             if (!AFI->getVarArgsFrameIndex()) {
   2790               VarArgStyleRegisters(CCInfo, DAG,
   2791                                    dl, Chain, CurOrigArg,
   2792                                    Ins[VA.getValNo()].PartOffset,
   2793                                    VA.getLocMemOffset(),
   2794                                    true /*force mutable frames*/);
   2795               int VAFrameIndex = AFI->getVarArgsFrameIndex();
   2796               InVals.push_back(DAG.getFrameIndex(VAFrameIndex, getPointerTy()));
   2797             } else {
   2798               int FI = MFI->CreateFixedObject(Flags.getByValSize(),
   2799                                               VA.getLocMemOffset(), false);
   2800               InVals.push_back(DAG.getFrameIndex(FI, getPointerTy()));
   2801             }
   2802           } else {
   2803             int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
   2804                                             VA.getLocMemOffset(), true);
   2805 
   2806             // Create load nodes to retrieve arguments from the stack.
   2807             SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
   2808             InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
   2809                                          MachinePointerInfo::getFixedStack(FI),
   2810                                          false, false, false, 0));
   2811           }
   2812           lastInsIndex = index;
   2813         }
   2814     }
   2815   }
   2816 
   2817   // varargs
   2818   if (isVarArg)
   2819     VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0, 0,
   2820                          CCInfo.getNextStackOffset());
   2821 
   2822   return Chain;
   2823 }
   2824 
   2825 /// isFloatingPointZero - Return true if this is +0.0.
   2826 static bool isFloatingPointZero(SDValue Op) {
   2827   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
   2828     return CFP->getValueAPF().isPosZero();
   2829   else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
   2830     // Maybe this has already been legalized into the constant pool?
   2831     if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
   2832       SDValue WrapperOp = Op.getOperand(1).getOperand(0);
   2833       if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
   2834         if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
   2835           return CFP->getValueAPF().isPosZero();
   2836     }
   2837   }
   2838   return false;
   2839 }
   2840 
   2841 /// Returns appropriate ARM CMP (cmp) and corresponding condition code for
   2842 /// the given operands.
   2843 SDValue
   2844 ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   2845                              SDValue &ARMcc, SelectionDAG &DAG,
   2846                              DebugLoc dl) const {
   2847   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
   2848     unsigned C = RHSC->getZExtValue();
   2849     if (!isLegalICmpImmediate(C)) {
   2850       // Constant does not fit, try adjusting it by one?
   2851       switch (CC) {
   2852       default: break;
   2853       case ISD::SETLT:
   2854       case ISD::SETGE:
   2855         if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
   2856           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
   2857           RHS = DAG.getConstant(C-1, MVT::i32);
   2858         }
   2859         break;
   2860       case ISD::SETULT:
   2861       case ISD::SETUGE:
   2862         if (C != 0 && isLegalICmpImmediate(C-1)) {
   2863           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
   2864           RHS = DAG.getConstant(C-1, MVT::i32);
   2865         }
   2866         break;
   2867       case ISD::SETLE:
   2868       case ISD::SETGT:
   2869         if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
   2870           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
   2871           RHS = DAG.getConstant(C+1, MVT::i32);
   2872         }
   2873         break;
   2874       case ISD::SETULE:
   2875       case ISD::SETUGT:
   2876         if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
   2877           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
   2878           RHS = DAG.getConstant(C+1, MVT::i32);
   2879         }
   2880         break;
   2881       }
   2882     }
   2883   }
   2884 
   2885   ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   2886   ARMISD::NodeType CompareType;
   2887   switch (CondCode) {
   2888   default:
   2889     CompareType = ARMISD::CMP;
   2890     break;
   2891   case ARMCC::EQ:
   2892   case ARMCC::NE:
   2893     // Uses only Z Flag
   2894     CompareType = ARMISD::CMPZ;
   2895     break;
   2896   }
   2897   ARMcc = DAG.getConstant(CondCode, MVT::i32);
   2898   return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
   2899 }
   2900 
   2901 /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
   2902 SDValue
   2903 ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   2904                              DebugLoc dl) const {
   2905   SDValue Cmp;
   2906   if (!isFloatingPointZero(RHS))
   2907     Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS);
   2908   else
   2909     Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS);
   2910   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
   2911 }
   2912 
   2913 /// duplicateCmp - Glue values can have only one use, so this function
   2914 /// duplicates a comparison node.
   2915 SDValue
   2916 ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
   2917   unsigned Opc = Cmp.getOpcode();
   2918   DebugLoc DL = Cmp.getDebugLoc();
   2919   if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
   2920     return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2921 
   2922   assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation");
   2923   Cmp = Cmp.getOperand(0);
   2924   Opc = Cmp.getOpcode();
   2925   if (Opc == ARMISD::CMPFP)
   2926     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
   2927   else {
   2928     assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT");
   2929     Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0));
   2930   }
   2931   return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
   2932 }
   2933 
   2934 SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   2935   SDValue Cond = Op.getOperand(0);
   2936   SDValue SelectTrue = Op.getOperand(1);
   2937   SDValue SelectFalse = Op.getOperand(2);
   2938   DebugLoc dl = Op.getDebugLoc();
   2939 
   2940   // Convert:
   2941   //
   2942   //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
   2943   //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
   2944   //
   2945   if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
   2946     const ConstantSDNode *CMOVTrue =
   2947       dyn_cast<ConstantSDNode>(Cond.getOperand(0));
   2948     const ConstantSDNode *CMOVFalse =
   2949       dyn_cast<ConstantSDNode>(Cond.getOperand(1));
   2950 
   2951     if (CMOVTrue && CMOVFalse) {
   2952       unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
   2953       unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
   2954 
   2955       SDValue True;
   2956       SDValue False;
   2957       if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
   2958         True = SelectTrue;
   2959         False = SelectFalse;
   2960       } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
   2961         True = SelectFalse;
   2962         False = SelectTrue;
   2963       }
   2964 
   2965       if (True.getNode() && False.getNode()) {
   2966         EVT VT = Op.getValueType();
   2967         SDValue ARMcc = Cond.getOperand(2);
   2968         SDValue CCR = Cond.getOperand(3);
   2969         SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
   2970         assert(True.getValueType() == VT);
   2971         return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
   2972       }
   2973     }
   2974   }
   2975 
   2976   // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
   2977   // undefined bits before doing a full-word comparison with zero.
   2978   Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
   2979                      DAG.getConstant(1, Cond.getValueType()));
   2980 
   2981   return DAG.getSelectCC(dl, Cond,
   2982                          DAG.getConstant(0, Cond.getValueType()),
   2983                          SelectTrue, SelectFalse, ISD::SETNE);
   2984 }
   2985 
   2986 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   2987   EVT VT = Op.getValueType();
   2988   SDValue LHS = Op.getOperand(0);
   2989   SDValue RHS = Op.getOperand(1);
   2990   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   2991   SDValue TrueVal = Op.getOperand(2);
   2992   SDValue FalseVal = Op.getOperand(3);
   2993   DebugLoc dl = Op.getDebugLoc();
   2994 
   2995   if (LHS.getValueType() == MVT::i32) {
   2996     SDValue ARMcc;
   2997     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   2998     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   2999     return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
   3000   }
   3001 
   3002   ARMCC::CondCodes CondCode, CondCode2;
   3003   FPCCToARMCC(CC, CondCode, CondCode2);
   3004 
   3005   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3006   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3007   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3008   SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
   3009                                ARMcc, CCR, Cmp);
   3010   if (CondCode2 != ARMCC::AL) {
   3011     SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
   3012     // FIXME: Needs another CMP because flag can have but one use.
   3013     SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
   3014     Result = DAG.getNode(ARMISD::CMOV, dl, VT,
   3015                          Result, TrueVal, ARMcc2, CCR, Cmp2);
   3016   }
   3017   return Result;
   3018 }
   3019 
   3020 /// canChangeToInt - Given the fp compare operand, return true if it is suitable
   3021 /// to morph to an integer compare sequence.
   3022 static bool canChangeToInt(SDValue Op, bool &SeenZero,
   3023                            const ARMSubtarget *Subtarget) {
   3024   SDNode *N = Op.getNode();
   3025   if (!N->hasOneUse())
   3026     // Otherwise it requires moving the value from fp to integer registers.
   3027     return false;
   3028   if (!N->getNumValues())
   3029     return false;
   3030   EVT VT = Op.getValueType();
   3031   if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
   3032     // f32 case is generally profitable. f64 case only makes sense when vcmpe +
   3033     // vmrs are very slow, e.g. cortex-a8.
   3034     return false;
   3035 
   3036   if (isFloatingPointZero(Op)) {
   3037     SeenZero = true;
   3038     return true;
   3039   }
   3040   return ISD::isNormalLoad(N);
   3041 }
   3042 
   3043 static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
   3044   if (isFloatingPointZero(Op))
   3045     return DAG.getConstant(0, MVT::i32);
   3046 
   3047   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
   3048     return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3049                        Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(),
   3050                        Ld->isVolatile(), Ld->isNonTemporal(),
   3051                        Ld->isInvariant(), Ld->getAlignment());
   3052 
   3053   llvm_unreachable("Unknown VFP cmp argument!");
   3054 }
   3055 
   3056 static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
   3057                            SDValue &RetVal1, SDValue &RetVal2) {
   3058   if (isFloatingPointZero(Op)) {
   3059     RetVal1 = DAG.getConstant(0, MVT::i32);
   3060     RetVal2 = DAG.getConstant(0, MVT::i32);
   3061     return;
   3062   }
   3063 
   3064   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
   3065     SDValue Ptr = Ld->getBasePtr();
   3066     RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3067                           Ld->getChain(), Ptr,
   3068                           Ld->getPointerInfo(),
   3069                           Ld->isVolatile(), Ld->isNonTemporal(),
   3070                           Ld->isInvariant(), Ld->getAlignment());
   3071 
   3072     EVT PtrType = Ptr.getValueType();
   3073     unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
   3074     SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
   3075                                  PtrType, Ptr, DAG.getConstant(4, PtrType));
   3076     RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
   3077                           Ld->getChain(), NewPtr,
   3078                           Ld->getPointerInfo().getWithOffset(4),
   3079                           Ld->isVolatile(), Ld->isNonTemporal(),
   3080                           Ld->isInvariant(), NewAlign);
   3081     return;
   3082   }
   3083 
   3084   llvm_unreachable("Unknown VFP cmp argument!");
   3085 }
   3086 
   3087 /// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
   3088 /// f32 and even f64 comparisons to integer ones.
   3089 SDValue
   3090 ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
   3091   SDValue Chain = Op.getOperand(0);
   3092   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3093   SDValue LHS = Op.getOperand(2);
   3094   SDValue RHS = Op.getOperand(3);
   3095   SDValue Dest = Op.getOperand(4);
   3096   DebugLoc dl = Op.getDebugLoc();
   3097 
   3098   bool LHSSeenZero = false;
   3099   bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
   3100   bool RHSSeenZero = false;
   3101   bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
   3102   if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
   3103     // If unsafe fp math optimization is enabled and there are no other uses of
   3104     // the CMP operands, and the condition code is EQ or NE, we can optimize it
   3105     // to an integer comparison.
   3106     if (CC == ISD::SETOEQ)
   3107       CC = ISD::SETEQ;
   3108     else if (CC == ISD::SETUNE)
   3109       CC = ISD::SETNE;
   3110 
   3111     SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32);
   3112     SDValue ARMcc;
   3113     if (LHS.getValueType() == MVT::f32) {
   3114       LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3115                         bitcastf32Toi32(LHS, DAG), Mask);
   3116       RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
   3117                         bitcastf32Toi32(RHS, DAG), Mask);
   3118       SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3119       SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3120       return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3121                          Chain, Dest, ARMcc, CCR, Cmp);
   3122     }
   3123 
   3124     SDValue LHS1, LHS2;
   3125     SDValue RHS1, RHS2;
   3126     expandf64Toi32(LHS, DAG, LHS1, LHS2);
   3127     expandf64Toi32(RHS, DAG, RHS1, RHS2);
   3128     LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
   3129     RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
   3130     ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
   3131     ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3132     SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3133     SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
   3134     return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
   3135   }
   3136 
   3137   return SDValue();
   3138 }
   3139 
   3140 SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   3141   SDValue Chain = Op.getOperand(0);
   3142   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   3143   SDValue LHS = Op.getOperand(2);
   3144   SDValue RHS = Op.getOperand(3);
   3145   SDValue Dest = Op.getOperand(4);
   3146   DebugLoc dl = Op.getDebugLoc();
   3147 
   3148   if (LHS.getValueType() == MVT::i32) {
   3149     SDValue ARMcc;
   3150     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
   3151     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3152     return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
   3153                        Chain, Dest, ARMcc, CCR, Cmp);
   3154   }
   3155 
   3156   assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
   3157 
   3158   if (getTargetMachine().Options.UnsafeFPMath &&
   3159       (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
   3160        CC == ISD::SETNE || CC == ISD::SETUNE)) {
   3161     SDValue Result = OptimizeVFPBrcond(Op, DAG);
   3162     if (Result.getNode())
   3163       return Result;
   3164   }
   3165 
   3166   ARMCC::CondCodes CondCode, CondCode2;
   3167   FPCCToARMCC(CC, CondCode, CondCode2);
   3168 
   3169   SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
   3170   SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
   3171   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3172   SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
   3173   SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
   3174   SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3175   if (CondCode2 != ARMCC::AL) {
   3176     ARMcc = DAG.getConstant(CondCode2, MVT::i32);
   3177     SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
   3178     Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
   3179   }
   3180   return Res;
   3181 }
   3182 
   3183 SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   3184   SDValue Chain = Op.getOperand(0);
   3185   SDValue Table = Op.getOperand(1);
   3186   SDValue Index = Op.getOperand(2);
   3187   DebugLoc dl = Op.getDebugLoc();
   3188 
   3189   EVT PTy = getPointerTy();
   3190   JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
   3191   ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
   3192   SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy);
   3193   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
   3194   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId);
   3195   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy));
   3196   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
   3197   if (Subtarget->isThumb2()) {
   3198     // Thumb2 uses a two-level jump. That is, it jumps into the jump table
   3199     // which does another jump to the destination. This also makes it easier
   3200     // to translate it to TBB / TBH later.
   3201     // FIXME: This might not work if the function is extremely large.
   3202     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
   3203                        Addr, Op.getOperand(2), JTI, UId);
   3204   }
   3205   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
   3206     Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
   3207                        MachinePointerInfo::getJumpTable(),
   3208                        false, false, false, 0);
   3209     Chain = Addr.getValue(1);
   3210     Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
   3211     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3212   } else {
   3213     Addr = DAG.getLoad(PTy, dl, Chain, Addr,
   3214                        MachinePointerInfo::getJumpTable(),
   3215                        false, false, false, 0);
   3216     Chain = Addr.getValue(1);
   3217     return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId);
   3218   }
   3219 }
   3220 
   3221 static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3222   EVT VT = Op.getValueType();
   3223   DebugLoc dl = Op.getDebugLoc();
   3224 
   3225   if (Op.getValueType().getVectorElementType() == MVT::i32) {
   3226     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
   3227       return Op;
   3228     return DAG.UnrollVectorOp(Op.getNode());
   3229   }
   3230 
   3231   assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&
   3232          "Invalid type for custom lowering!");
   3233   if (VT != MVT::v4i16)
   3234     return DAG.UnrollVectorOp(Op.getNode());
   3235 
   3236   Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
   3237   return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
   3238 }
   3239 
   3240 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
   3241   EVT VT = Op.getValueType();
   3242   if (VT.isVector())
   3243     return LowerVectorFP_TO_INT(Op, DAG);
   3244 
   3245   DebugLoc dl = Op.getDebugLoc();
   3246   unsigned Opc;
   3247 
   3248   switch (Op.getOpcode()) {
   3249   default: llvm_unreachable("Invalid opcode!");
   3250   case ISD::FP_TO_SINT:
   3251     Opc = ARMISD::FTOSI;
   3252     break;
   3253   case ISD::FP_TO_UINT:
   3254     Opc = ARMISD::FTOUI;
   3255     break;
   3256   }
   3257   Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0));
   3258   return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
   3259 }
   3260 
   3261 static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3262   EVT VT = Op.getValueType();
   3263   DebugLoc dl = Op.getDebugLoc();
   3264 
   3265   if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
   3266     if (VT.getVectorElementType() == MVT::f32)
   3267       return Op;
   3268     return DAG.UnrollVectorOp(Op.getNode());
   3269   }
   3270 
   3271   assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&
   3272          "Invalid type for custom lowering!");
   3273   if (VT != MVT::v4f32)
   3274     return DAG.UnrollVectorOp(Op.getNode());
   3275 
   3276   unsigned CastOpc;
   3277   unsigned Opc;
   3278   switch (Op.getOpcode()) {
   3279   default: llvm_unreachable("Invalid opcode!");
   3280   case ISD::SINT_TO_FP:
   3281     CastOpc = ISD::SIGN_EXTEND;
   3282     Opc = ISD::SINT_TO_FP;
   3283     break;
   3284   case ISD::UINT_TO_FP:
   3285     CastOpc = ISD::ZERO_EXTEND;
   3286     Opc = ISD::UINT_TO_FP;
   3287     break;
   3288   }
   3289 
   3290   Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
   3291   return DAG.getNode(Opc, dl, VT, Op);
   3292 }
   3293 
   3294 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
   3295   EVT VT = Op.getValueType();
   3296   if (VT.isVector())
   3297     return LowerVectorINT_TO_FP(Op, DAG);
   3298 
   3299   DebugLoc dl = Op.getDebugLoc();
   3300   unsigned Opc;
   3301 
   3302   switch (Op.getOpcode()) {
   3303   default: llvm_unreachable("Invalid opcode!");
   3304   case ISD::SINT_TO_FP:
   3305     Opc = ARMISD::SITOF;
   3306     break;
   3307   case ISD::UINT_TO_FP:
   3308     Opc = ARMISD::UITOF;
   3309     break;
   3310   }
   3311 
   3312   Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0));
   3313   return DAG.getNode(Opc, dl, VT, Op);
   3314 }
   3315 
   3316 SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
   3317   // Implement fcopysign with a fabs and a conditional fneg.
   3318   SDValue Tmp0 = Op.getOperand(0);
   3319   SDValue Tmp1 = Op.getOperand(1);
   3320   DebugLoc dl = Op.getDebugLoc();
   3321   EVT VT = Op.getValueType();
   3322   EVT SrcVT = Tmp1.getValueType();
   3323   bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
   3324     Tmp0.getOpcode() == ARMISD::VMOVDRR;
   3325   bool UseNEON = !InGPR && Subtarget->hasNEON();
   3326 
   3327   if (UseNEON) {
   3328     // Use VBSL to copy the sign bit.
   3329     unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
   3330     SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
   3331                                DAG.getTargetConstant(EncodedVal, MVT::i32));
   3332     EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
   3333     if (VT == MVT::f64)
   3334       Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3335                          DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
   3336                          DAG.getConstant(32, MVT::i32));
   3337     else /*if (VT == MVT::f32)*/
   3338       Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
   3339     if (SrcVT == MVT::f32) {
   3340       Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
   3341       if (VT == MVT::f64)
   3342         Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
   3343                            DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
   3344                            DAG.getConstant(32, MVT::i32));
   3345     } else if (VT == MVT::f32)
   3346       Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
   3347                          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
   3348                          DAG.getConstant(32, MVT::i32));
   3349     Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
   3350     Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
   3351 
   3352     SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
   3353                                             MVT::i32);
   3354     AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
   3355     SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
   3356                                   DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
   3357 
   3358     SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
   3359                               DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
   3360                               DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
   3361     if (VT == MVT::f32) {
   3362       Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
   3363       Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
   3364                         DAG.getConstant(0, MVT::i32));
   3365     } else {
   3366       Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
   3367     }
   3368 
   3369     return Res;
   3370   }
   3371 
   3372   // Bitcast operand 1 to i32.
   3373   if (SrcVT == MVT::f64)
   3374     Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3375                        &Tmp1, 1).getValue(1);
   3376   Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
   3377 
   3378   // Or in the signbit with integer operations.
   3379   SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32);
   3380   SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32);
   3381   Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
   3382   if (VT == MVT::f32) {
   3383     Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
   3384                        DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
   3385     return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
   3386                        DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
   3387   }
   3388 
   3389   // f64: Or the high part with signbit and then combine two parts.
   3390   Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
   3391                      &Tmp0, 1);
   3392   SDValue Lo = Tmp0.getValue(0);
   3393   SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
   3394   Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
   3395   return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
   3396 }
   3397 
   3398 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   3399   MachineFunction &MF = DAG.getMachineFunction();
   3400   MachineFrameInfo *MFI = MF.getFrameInfo();
   3401   MFI->setReturnAddressIsTaken(true);
   3402 
   3403   EVT VT = Op.getValueType();
   3404   DebugLoc dl = Op.getDebugLoc();
   3405   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3406   if (Depth) {
   3407     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   3408     SDValue Offset = DAG.getConstant(4, MVT::i32);
   3409     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
   3410                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
   3411                        MachinePointerInfo(), false, false, false, 0);
   3412   }
   3413 
   3414   // Return LR, which contains the return address. Mark it an implicit live-in.
   3415   unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
   3416   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   3417 }
   3418 
   3419 SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   3420   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   3421   MFI->setFrameAddressIsTaken(true);
   3422 
   3423   EVT VT = Op.getValueType();
   3424   DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
   3425   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   3426   unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin())
   3427     ? ARM::R7 : ARM::R11;
   3428   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
   3429   while (Depth--)
   3430     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
   3431                             MachinePointerInfo(),
   3432                             false, false, false, 0);
   3433   return FrameAddr;
   3434 }
   3435 
   3436 /// ExpandBITCAST - If the target supports VFP, this function is called to
   3437 /// expand a bit convert where either the source or destination type is i64 to
   3438 /// use a VMOVDRR or VMOVRRD node.  This should not be done when the non-i64
   3439 /// operand type is illegal (e.g., v2f32 for a target that doesn't support
   3440 /// vectors), since the legalizer won't know what to do with that.
   3441 static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
   3442   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   3443   DebugLoc dl = N->getDebugLoc();
   3444   SDValue Op = N->getOperand(0);
   3445 
   3446   // This function is only supposed to be called for i64 types, either as the
   3447   // source or destination of the bit convert.
   3448   EVT SrcVT = Op.getValueType();
   3449   EVT DstVT = N->getValueType(0);
   3450   assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&
   3451          "ExpandBITCAST called for non-i64 type");
   3452 
   3453   // Turn i64->f64 into VMOVDRR.
   3454   if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
   3455     SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3456                              DAG.getConstant(0, MVT::i32));
   3457     SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
   3458                              DAG.getConstant(1, MVT::i32));
   3459     return DAG.getNode(ISD::BITCAST, dl, DstVT,
   3460                        DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
   3461   }
   3462 
   3463   // Turn f64->i64 into VMOVRRD.
   3464   if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
   3465     SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
   3466                               DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
   3467     // Merge the pieces into a single i64 value.
   3468     return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
   3469   }
   3470 
   3471   return SDValue();
   3472 }
   3473 
   3474 /// getZeroVector - Returns a vector of specified type with all zero elements.
   3475 /// Zero vectors are used to represent vector negation and in those cases
   3476 /// will be implemented with the NEON VNEG instruction.  However, VNEG does
   3477 /// not support i64 elements, so sometimes the zero vectors will need to be
   3478 /// explicitly constructed.  Regardless, use a canonical VMOV to create the
   3479 /// zero vector.
   3480 static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   3481   assert(VT.isVector() && "Expected a vector type");
   3482   // The canonical modified immediate encoding of a zero vector is....0!
   3483   SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32);
   3484   EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
   3485   SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
   3486   return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   3487 }
   3488 
   3489 /// LowerShiftRightParts - Lower SRA_PARTS, which returns two
   3490 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3491 SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   3492                                                 SelectionDAG &DAG) const {
   3493   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3494   EVT VT = Op.getValueType();
   3495   unsigned VTBits = VT.getSizeInBits();
   3496   DebugLoc dl = Op.getDebugLoc();
   3497   SDValue ShOpLo = Op.getOperand(0);
   3498   SDValue ShOpHi = Op.getOperand(1);
   3499   SDValue ShAmt  = Op.getOperand(2);
   3500   SDValue ARMcc;
   3501   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
   3502 
   3503   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
   3504 
   3505   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3506                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3507   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
   3508   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3509                                    DAG.getConstant(VTBits, MVT::i32));
   3510   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
   3511   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3512   SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
   3513 
   3514   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3515   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3516                           ARMcc, DAG, dl);
   3517   SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
   3518   SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
   3519                            CCR, Cmp);
   3520 
   3521   SDValue Ops[2] = { Lo, Hi };
   3522   return DAG.getMergeValues(Ops, 2, dl);
   3523 }
   3524 
   3525 /// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
   3526 /// i32 values and take a 2 x i32 value to shift plus a shift amount.
   3527 SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   3528                                                SelectionDAG &DAG) const {
   3529   assert(Op.getNumOperands() == 3 && "Not a double-shift!");
   3530   EVT VT = Op.getValueType();
   3531   unsigned VTBits = VT.getSizeInBits();
   3532   DebugLoc dl = Op.getDebugLoc();
   3533   SDValue ShOpLo = Op.getOperand(0);
   3534   SDValue ShOpHi = Op.getOperand(1);
   3535   SDValue ShAmt  = Op.getOperand(2);
   3536   SDValue ARMcc;
   3537 
   3538   assert(Op.getOpcode() == ISD::SHL_PARTS);
   3539   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
   3540                                  DAG.getConstant(VTBits, MVT::i32), ShAmt);
   3541   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
   3542   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
   3543                                    DAG.getConstant(VTBits, MVT::i32));
   3544   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
   3545   SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
   3546 
   3547   SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
   3548   SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   3549   SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
   3550                           ARMcc, DAG, dl);
   3551   SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
   3552   SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
   3553                            CCR, Cmp);
   3554 
   3555   SDValue Ops[2] = { Lo, Hi };
   3556   return DAG.getMergeValues(Ops, 2, dl);
   3557 }
   3558 
   3559 SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   3560                                             SelectionDAG &DAG) const {
   3561   // The rounding mode is in bits 23:22 of the FPSCR.
   3562   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
   3563   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
   3564   // so that the shift + and get folded into a bitfield extract.
   3565   DebugLoc dl = Op.getDebugLoc();
   3566   SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32,
   3567                               DAG.getConstant(Intrinsic::arm_get_fpscr,
   3568                                               MVT::i32));
   3569   SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
   3570                                   DAG.getConstant(1U << 22, MVT::i32));
   3571   SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
   3572                               DAG.getConstant(22, MVT::i32));
   3573   return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
   3574                      DAG.getConstant(3, MVT::i32));
   3575 }
   3576 
   3577 static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
   3578                          const ARMSubtarget *ST) {
   3579   EVT VT = N->getValueType(0);
   3580   DebugLoc dl = N->getDebugLoc();
   3581 
   3582   if (!ST->hasV6T2Ops())
   3583     return SDValue();
   3584 
   3585   SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0));
   3586   return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
   3587 }
   3588 
   3589 /// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
   3590 /// for each 16-bit element from operand, repeated.  The basic idea is to
   3591 /// leverage vcnt to get the 8-bit counts, gather and add the results.
   3592 ///
   3593 /// Trace for v4i16:
   3594 /// input    = [v0    v1    v2    v3   ] (vi 16-bit element)
   3595 /// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
   3596 /// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
   3597 /// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
   3598 ///            [b0 b1 b2 b3 b4 b5 b6 b7]
   3599 ///           +[b1 b0 b3 b2 b5 b4 b7 b6]
   3600 /// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
   3601 /// vuzp:    = [k0 k1 k2 k3 k0 k1 k2 k3]  each ki is 8-bits)
   3602 static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
   3603   EVT VT = N->getValueType(0);
   3604   DebugLoc DL = N->getDebugLoc();
   3605 
   3606   EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
   3607   SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
   3608   SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
   3609   SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
   3610   SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
   3611   return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
   3612 }
   3613 
   3614 /// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
   3615 /// bit-count for each 16-bit element from the operand.  We need slightly
   3616 /// different sequencing for v4i16 and v8i16 to stay within NEON's available
   3617 /// 64/128-bit registers.
   3618 ///
   3619 /// Trace for v4i16:
   3620 /// input           = [v0    v1    v2    v3    ] (vi 16-bit element)
   3621 /// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
   3622 /// v8i16:Extended  = [k0    k1    k2    k3    k0    k1    k2    k3    ]
   3623 /// v4i16:Extracted = [k0    k1    k2    k3    ]
   3624 static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
   3625   EVT VT = N->getValueType(0);
   3626   DebugLoc DL = N->getDebugLoc();
   3627 
   3628   SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
   3629   if (VT.is64BitVector()) {
   3630     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
   3631     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
   3632                        DAG.getIntPtrConstant(0));
   3633   } else {
   3634     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
   3635                                     BitCounts, DAG.getIntPtrConstant(0));
   3636     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
   3637   }
   3638 }
   3639 
   3640 /// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
   3641 /// bit-count for each 32-bit element from the operand.  The idea here is
   3642 /// to split the vector into 16-bit elements, leverage the 16-bit count
   3643 /// routine, and then combine the results.
   3644 ///
   3645 /// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
   3646 /// input    = [v0    v1    ] (vi: 32-bit elements)
   3647 /// Bitcast  = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
   3648 /// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
   3649 /// vrev: N0 = [k1 k0 k3 k2 ]
   3650 ///            [k0 k1 k2 k3 ]
   3651 ///       N1 =+[k1 k0 k3 k2 ]
   3652 ///            [k0 k2 k1 k3 ]
   3653 ///       N2 =+[k1 k3 k0 k2 ]
   3654 ///            [k0    k2    k1    k3    ]
   3655 /// Extended =+[k1    k3    k0    k2    ]
   3656 ///            [k0    k2    ]
   3657 /// Extracted=+[k1    k3    ]
   3658 ///
   3659 static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
   3660   EVT VT = N->getValueType(0);
   3661   DebugLoc DL = N->getDebugLoc();
   3662 
   3663   EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
   3664 
   3665   SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
   3666   SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
   3667   SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
   3668   SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
   3669   SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
   3670 
   3671   if (VT.is64BitVector()) {
   3672     SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
   3673     return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
   3674                        DAG.getIntPtrConstant(0));
   3675   } else {
   3676     SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
   3677                                     DAG.getIntPtrConstant(0));
   3678     return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
   3679   }
   3680 }
   3681 
   3682 static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
   3683                           const ARMSubtarget *ST) {
   3684   EVT VT = N->getValueType(0);
   3685 
   3686   assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.");
   3687   assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||
   3688           VT == MVT::v4i16 || VT == MVT::v8i16) &&
   3689          "Unexpected type for custom ctpop lowering");
   3690 
   3691   if (VT.getVectorElementType() == MVT::i32)
   3692     return lowerCTPOP32BitElements(N, DAG);
   3693   else
   3694     return lowerCTPOP16BitElements(N, DAG);
   3695 }
   3696 
   3697 static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
   3698                           const ARMSubtarget *ST) {
   3699   EVT VT = N->getValueType(0);
   3700   DebugLoc dl = N->getDebugLoc();
   3701 
   3702   if (!VT.isVector())
   3703     return SDValue();
   3704 
   3705   // Lower vector shifts on NEON to use VSHL.
   3706   assert(ST->hasNEON() && "unexpected vector shift");
   3707 
   3708   // Left shifts translate directly to the vshiftu intrinsic.
   3709   if (N->getOpcode() == ISD::SHL)
   3710     return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3711                        DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32),
   3712                        N->getOperand(0), N->getOperand(1));
   3713 
   3714   assert((N->getOpcode() == ISD::SRA ||
   3715           N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode");
   3716 
   3717   // NEON uses the same intrinsics for both left and right shifts.  For
   3718   // right shifts, the shift amounts are negative, so negate the vector of
   3719   // shift amounts.
   3720   EVT ShiftVT = N->getOperand(1).getValueType();
   3721   SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
   3722                                      getZeroVector(ShiftVT, DAG, dl),
   3723                                      N->getOperand(1));
   3724   Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
   3725                              Intrinsic::arm_neon_vshifts :
   3726                              Intrinsic::arm_neon_vshiftu);
   3727   return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
   3728                      DAG.getConstant(vshiftInt, MVT::i32),
   3729                      N->getOperand(0), NegatedCount);
   3730 }
   3731 
   3732 static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
   3733                                 const ARMSubtarget *ST) {
   3734   EVT VT = N->getValueType(0);
   3735   DebugLoc dl = N->getDebugLoc();
   3736 
   3737   // We can get here for a node like i32 = ISD::SHL i32, i64
   3738   if (VT != MVT::i64)
   3739     return SDValue();
   3740 
   3741   assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&
   3742          "Unknown shift to lower!");
   3743 
   3744   // We only lower SRA, SRL of 1 here, all others use generic lowering.
   3745   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
   3746       cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1)
   3747     return SDValue();
   3748 
   3749   // If we are in thumb mode, we don't have RRX.
   3750   if (ST->isThumb1Only()) return SDValue();
   3751 
   3752   // Okay, we have a 64-bit SRA or SRL of 1.  Lower this to an RRX expr.
   3753   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3754                            DAG.getConstant(0, MVT::i32));
   3755   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
   3756                            DAG.getConstant(1, MVT::i32));
   3757 
   3758   // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
   3759   // captures the result into a carry flag.
   3760   unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
   3761   Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1);
   3762 
   3763   // The low part is an ARMISD::RRX operand, which shifts the carry in.
   3764   Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
   3765 
   3766   // Merge the pieces into a single i64 value.
   3767  return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
   3768 }
   3769 
   3770 static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   3771   SDValue TmpOp0, TmpOp1;
   3772   bool Invert = false;
   3773   bool Swap = false;
   3774   unsigned Opc = 0;
   3775 
   3776   SDValue Op0 = Op.getOperand(0);
   3777   SDValue Op1 = Op.getOperand(1);
   3778   SDValue CC = Op.getOperand(2);
   3779   EVT VT = Op.getValueType();
   3780   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   3781   DebugLoc dl = Op.getDebugLoc();
   3782 
   3783   if (Op.getOperand(1).getValueType().isFloatingPoint()) {
   3784     switch (SetCCOpcode) {
   3785     default: llvm_unreachable("Illegal FP comparison");
   3786     case ISD::SETUNE:
   3787     case ISD::SETNE:  Invert = true; // Fallthrough
   3788     case ISD::SETOEQ:
   3789     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3790     case ISD::SETOLT:
   3791     case ISD::SETLT: Swap = true; // Fallthrough
   3792     case ISD::SETOGT:
   3793     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3794     case ISD::SETOLE:
   3795     case ISD::SETLE:  Swap = true; // Fallthrough
   3796     case ISD::SETOGE:
   3797     case ISD::SETGE: Opc = ARMISD::VCGE; break;
   3798     case ISD::SETUGE: Swap = true; // Fallthrough
   3799     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
   3800     case ISD::SETUGT: Swap = true; // Fallthrough
   3801     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
   3802     case ISD::SETUEQ: Invert = true; // Fallthrough
   3803     case ISD::SETONE:
   3804       // Expand this to (OLT | OGT).
   3805       TmpOp0 = Op0;
   3806       TmpOp1 = Op1;
   3807       Opc = ISD::OR;
   3808       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3809       Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1);
   3810       break;
   3811     case ISD::SETUO: Invert = true; // Fallthrough
   3812     case ISD::SETO:
   3813       // Expand this to (OLT | OGE).
   3814       TmpOp0 = Op0;
   3815       TmpOp1 = Op1;
   3816       Opc = ISD::OR;
   3817       Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0);
   3818       Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1);
   3819       break;
   3820     }
   3821   } else {
   3822     // Integer comparisons.
   3823     switch (SetCCOpcode) {
   3824     default: llvm_unreachable("Illegal integer comparison");
   3825     case ISD::SETNE:  Invert = true;
   3826     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
   3827     case ISD::SETLT:  Swap = true;
   3828     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
   3829     case ISD::SETLE:  Swap = true;
   3830     case ISD::SETGE:  Opc = ARMISD::VCGE; break;
   3831     case ISD::SETULT: Swap = true;
   3832     case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
   3833     case ISD::SETULE: Swap = true;
   3834     case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
   3835     }
   3836 
   3837     // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
   3838     if (Opc == ARMISD::VCEQ) {
   3839 
   3840       SDValue AndOp;
   3841       if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3842         AndOp = Op0;
   3843       else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
   3844         AndOp = Op1;
   3845 
   3846       // Ignore bitconvert.
   3847       if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
   3848         AndOp = AndOp.getOperand(0);
   3849 
   3850       if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
   3851         Opc = ARMISD::VTST;
   3852         Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0));
   3853         Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1));
   3854         Invert = !Invert;
   3855       }
   3856     }
   3857   }
   3858 
   3859   if (Swap)
   3860     std::swap(Op0, Op1);
   3861 
   3862   // If one of the operands is a constant vector zero, attempt to fold the
   3863   // comparison to a specialized compare-against-zero form.
   3864   SDValue SingleOp;
   3865   if (ISD::isBuildVectorAllZeros(Op1.getNode()))
   3866     SingleOp = Op0;
   3867   else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
   3868     if (Opc == ARMISD::VCGE)
   3869       Opc = ARMISD::VCLEZ;
   3870     else if (Opc == ARMISD::VCGT)
   3871       Opc = ARMISD::VCLTZ;
   3872     SingleOp = Op1;
   3873   }
   3874 
   3875   SDValue Result;
   3876   if (SingleOp.getNode()) {
   3877     switch (Opc) {
   3878     case ARMISD::VCEQ:
   3879       Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break;
   3880     case ARMISD::VCGE:
   3881       Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break;
   3882     case ARMISD::VCLEZ:
   3883       Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break;
   3884     case ARMISD::VCGT:
   3885       Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break;
   3886     case ARMISD::VCLTZ:
   3887       Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break;
   3888     default:
   3889       Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
   3890     }
   3891   } else {
   3892      Result = DAG.getNode(Opc, dl, VT, Op0, Op1);
   3893   }
   3894 
   3895   if (Invert)
   3896     Result = DAG.getNOT(dl, Result, VT);
   3897 
   3898   return Result;
   3899 }
   3900 
   3901 /// isNEONModifiedImm - Check if the specified splat value corresponds to a
   3902 /// valid vector constant for a NEON instruction with a "modified immediate"
   3903 /// operand (e.g., VMOV).  If so, return the encoded value.
   3904 static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
   3905                                  unsigned SplatBitSize, SelectionDAG &DAG,
   3906                                  EVT &VT, bool is128Bits, NEONModImmType type) {
   3907   unsigned OpCmode, Imm;
   3908 
   3909   // SplatBitSize is set to the smallest size that splats the vector, so a
   3910   // zero vector will always have SplatBitSize == 8.  However, NEON modified
   3911   // immediate instructions others than VMOV do not support the 8-bit encoding
   3912   // of a zero vector, and the default encoding of zero is supposed to be the
   3913   // 32-bit version.
   3914   if (SplatBits == 0)
   3915     SplatBitSize = 32;
   3916 
   3917   switch (SplatBitSize) {
   3918   case 8:
   3919     if (type != VMOVModImm)
   3920       return SDValue();
   3921     // Any 1-byte value is OK.  Op=0, Cmode=1110.
   3922     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
   3923     OpCmode = 0xe;
   3924     Imm = SplatBits;
   3925     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
   3926     break;
   3927 
   3928   case 16:
   3929     // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
   3930     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
   3931     if ((SplatBits & ~0xff) == 0) {
   3932       // Value = 0x00nn: Op=x, Cmode=100x.
   3933       OpCmode = 0x8;
   3934       Imm = SplatBits;
   3935       break;
   3936     }
   3937     if ((SplatBits & ~0xff00) == 0) {
   3938       // Value = 0xnn00: Op=x, Cmode=101x.
   3939       OpCmode = 0xa;
   3940       Imm = SplatBits >> 8;
   3941       break;
   3942     }
   3943     return SDValue();
   3944 
   3945   case 32:
   3946     // NEON's 32-bit VMOV supports splat values where:
   3947     // * only one byte is nonzero, or
   3948     // * the least significant byte is 0xff and the second byte is nonzero, or
   3949     // * the least significant 2 bytes are 0xff and the third is nonzero.
   3950     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
   3951     if ((SplatBits & ~0xff) == 0) {
   3952       // Value = 0x000000nn: Op=x, Cmode=000x.
   3953       OpCmode = 0;
   3954       Imm = SplatBits;
   3955       break;
   3956     }
   3957     if ((SplatBits & ~0xff00) == 0) {
   3958       // Value = 0x0000nn00: Op=x, Cmode=001x.
   3959       OpCmode = 0x2;
   3960       Imm = SplatBits >> 8;
   3961       break;
   3962     }
   3963     if ((SplatBits & ~0xff0000) == 0) {
   3964       // Value = 0x00nn0000: Op=x, Cmode=010x.
   3965       OpCmode = 0x4;
   3966       Imm = SplatBits >> 16;
   3967       break;
   3968     }
   3969     if ((SplatBits & ~0xff000000) == 0) {
   3970       // Value = 0xnn000000: Op=x, Cmode=011x.
   3971       OpCmode = 0x6;
   3972       Imm = SplatBits >> 24;
   3973       break;
   3974     }
   3975 
   3976     // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
   3977     if (type == OtherModImm) return SDValue();
   3978 
   3979     if ((SplatBits & ~0xffff) == 0 &&
   3980         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
   3981       // Value = 0x0000nnff: Op=x, Cmode=1100.
   3982       OpCmode = 0xc;
   3983       Imm = SplatBits >> 8;
   3984       SplatBits |= 0xff;
   3985       break;
   3986     }
   3987 
   3988     if ((SplatBits & ~0xffffff) == 0 &&
   3989         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
   3990       // Value = 0x00nnffff: Op=x, Cmode=1101.
   3991       OpCmode = 0xd;
   3992       Imm = SplatBits >> 16;
   3993       SplatBits |= 0xffff;
   3994       break;
   3995     }
   3996 
   3997     // Note: there are a few 32-bit splat values (specifically: 00ffff00,
   3998     // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
   3999     // VMOV.I32.  A (very) minor optimization would be to replicate the value
   4000     // and fall through here to test for a valid 64-bit splat.  But, then the
   4001     // caller would also need to check and handle the change in size.
   4002     return SDValue();
   4003 
   4004   case 64: {
   4005     if (type != VMOVModImm)
   4006       return SDValue();
   4007     // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
   4008     uint64_t BitMask = 0xff;
   4009     uint64_t Val = 0;
   4010     unsigned ImmMask = 1;
   4011     Imm = 0;
   4012     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
   4013       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
   4014         Val |= BitMask;
   4015         Imm |= ImmMask;
   4016       } else if ((SplatBits & BitMask) != 0) {
   4017         return SDValue();
   4018       }
   4019       BitMask <<= 8;
   4020       ImmMask <<= 1;
   4021     }
   4022     // Op=1, Cmode=1110.
   4023     OpCmode = 0x1e;
   4024     SplatBits = Val;
   4025     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
   4026     break;
   4027   }
   4028 
   4029   default:
   4030     llvm_unreachable("unexpected size for isNEONModifiedImm");
   4031   }
   4032 
   4033   unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
   4034   return DAG.getTargetConstant(EncodedVal, MVT::i32);
   4035 }
   4036 
   4037 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
   4038                                            const ARMSubtarget *ST) const {
   4039   if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16())
   4040     return SDValue();
   4041 
   4042   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
   4043   assert(Op.getValueType() == MVT::f32 &&
   4044          "ConstantFP custom lowering should only occur for f32.");
   4045 
   4046   // Try splatting with a VMOV.f32...
   4047   APFloat FPVal = CFP->getValueAPF();
   4048   int ImmVal = ARM_AM::getFP32Imm(FPVal);
   4049   if (ImmVal != -1) {
   4050     DebugLoc DL = Op.getDebugLoc();
   4051     SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32);
   4052     SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
   4053                                       NewVal);
   4054     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
   4055                        DAG.getConstant(0, MVT::i32));
   4056   }
   4057 
   4058   // If that fails, try a VMOV.i32
   4059   EVT VMovVT;
   4060   unsigned iVal = FPVal.bitcastToAPInt().getZExtValue();
   4061   SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false,
   4062                                      VMOVModImm);
   4063   if (NewVal != SDValue()) {
   4064     DebugLoc DL = Op.getDebugLoc();
   4065     SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
   4066                                       NewVal);
   4067     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
   4068                                        VecConstant);
   4069     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
   4070                        DAG.getConstant(0, MVT::i32));
   4071   }
   4072 
   4073   // Finally, try a VMVN.i32
   4074   NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false,
   4075                              VMVNModImm);
   4076   if (NewVal != SDValue()) {
   4077     DebugLoc DL = Op.getDebugLoc();
   4078     SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
   4079     SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
   4080                                        VecConstant);
   4081     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
   4082                        DAG.getConstant(0, MVT::i32));
   4083   }
   4084 
   4085   return SDValue();
   4086 }
   4087 
   4088 // check if an VEXT instruction can handle the shuffle mask when the
   4089 // vector sources of the shuffle are the same.
   4090 static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
   4091   unsigned NumElts = VT.getVectorNumElements();
   4092 
   4093   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
   4094   if (M[0] < 0)
   4095     return false;
   4096 
   4097   Imm = M[0];
   4098 
   4099   // If this is a VEXT shuffle, the immediate value is the index of the first
   4100   // element.  The other shuffle indices must be the successive elements after
   4101   // the first one.
   4102   unsigned ExpectedElt = Imm;
   4103   for (unsigned i = 1; i < NumElts; ++i) {
   4104     // Increment the expected index.  If it wraps around, just follow it
   4105     // back to index zero and keep going.
   4106     ++ExpectedElt;
   4107     if (ExpectedElt == NumElts)
   4108       ExpectedElt = 0;
   4109 
   4110     if (M[i] < 0) continue; // ignore UNDEF indices
   4111     if (ExpectedElt != static_cast<unsigned>(M[i]))
   4112       return false;
   4113   }
   4114 
   4115   return true;
   4116 }
   4117 
   4118 
   4119 static bool isVEXTMask(ArrayRef<int> M, EVT VT,
   4120                        bool &ReverseVEXT, unsigned &Imm) {
   4121   unsigned NumElts = VT.getVectorNumElements();
   4122   ReverseVEXT = false;
   4123 
   4124   // Assume that the first shuffle index is not UNDEF.  Fail if it is.
   4125   if (M[0] < 0)
   4126     return false;
   4127 
   4128   Imm = M[0];
   4129 
   4130   // If this is a VEXT shuffle, the immediate value is the index of the first
   4131   // element.  The other shuffle indices must be the successive elements after
   4132   // the first one.
   4133   unsigned ExpectedElt = Imm;
   4134   for (unsigned i = 1; i < NumElts; ++i) {
   4135     // Increment the expected index.  If it wraps around, it may still be
   4136     // a VEXT but the source vectors must be swapped.
   4137     ExpectedElt += 1;
   4138     if (ExpectedElt == NumElts * 2) {
   4139       ExpectedElt = 0;
   4140       ReverseVEXT = true;
   4141     }
   4142 
   4143     if (M[i] < 0) continue; // ignore UNDEF indices
   4144     if (ExpectedElt != static_cast<unsigned>(M[i]))
   4145       return false;
   4146   }
   4147 
   4148   // Adjust the index value if the source operands will be swapped.
   4149   if (ReverseVEXT)
   4150     Imm -= NumElts;
   4151 
   4152   return true;
   4153 }
   4154 
   4155 /// isVREVMask - Check if a vector shuffle corresponds to a VREV
   4156 /// instruction with the specified blocksize.  (The order of the elements
   4157 /// within each block of the vector is reversed.)
   4158 static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   4159   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
   4160          "Only possible block sizes for VREV are: 16, 32, 64");
   4161 
   4162   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4163   if (EltSz == 64)
   4164     return false;
   4165 
   4166   unsigned NumElts = VT.getVectorNumElements();
   4167   unsigned BlockElts = M[0] + 1;
   4168   // If the first shuffle index is UNDEF, be optimistic.
   4169   if (M[0] < 0)
   4170     BlockElts = BlockSize / EltSz;
   4171 
   4172   if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
   4173     return false;
   4174 
   4175   for (unsigned i = 0; i < NumElts; ++i) {
   4176     if (M[i] < 0) continue; // ignore UNDEF indices
   4177     if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
   4178       return false;
   4179   }
   4180 
   4181   return true;
   4182 }
   4183 
   4184 static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
   4185   // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
   4186   // range, then 0 is placed into the resulting vector. So pretty much any mask
   4187   // of 8 elements can work here.
   4188   return VT == MVT::v8i8 && M.size() == 8;
   4189 }
   4190 
   4191 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4192   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4193   if (EltSz == 64)
   4194     return false;
   4195 
   4196   unsigned NumElts = VT.getVectorNumElements();
   4197   WhichResult = (M[0] == 0 ? 0 : 1);
   4198   for (unsigned i = 0; i < NumElts; i += 2) {
   4199     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
   4200         (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult))
   4201       return false;
   4202   }
   4203   return true;
   4204 }
   4205 
   4206 /// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
   4207 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4208 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
   4209 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4210   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4211   if (EltSz == 64)
   4212     return false;
   4213 
   4214   unsigned NumElts = VT.getVectorNumElements();
   4215   WhichResult = (M[0] == 0 ? 0 : 1);
   4216   for (unsigned i = 0; i < NumElts; i += 2) {
   4217     if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) ||
   4218         (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult))
   4219       return false;
   4220   }
   4221   return true;
   4222 }
   4223 
   4224 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4225   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4226   if (EltSz == 64)
   4227     return false;
   4228 
   4229   unsigned NumElts = VT.getVectorNumElements();
   4230   WhichResult = (M[0] == 0 ? 0 : 1);
   4231   for (unsigned i = 0; i != NumElts; ++i) {
   4232     if (M[i] < 0) continue; // ignore UNDEF indices
   4233     if ((unsigned) M[i] != 2 * i + WhichResult)
   4234       return false;
   4235   }
   4236 
   4237   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4238   if (VT.is64BitVector() && EltSz == 32)
   4239     return false;
   4240 
   4241   return true;
   4242 }
   4243 
   4244 /// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
   4245 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4246 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
   4247 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4248   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4249   if (EltSz == 64)
   4250     return false;
   4251 
   4252   unsigned Half = VT.getVectorNumElements() / 2;
   4253   WhichResult = (M[0] == 0 ? 0 : 1);
   4254   for (unsigned j = 0; j != 2; ++j) {
   4255     unsigned Idx = WhichResult;
   4256     for (unsigned i = 0; i != Half; ++i) {
   4257       int MIdx = M[i + j * Half];
   4258       if (MIdx >= 0 && (unsigned) MIdx != Idx)
   4259         return false;
   4260       Idx += 2;
   4261     }
   4262   }
   4263 
   4264   // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4265   if (VT.is64BitVector() && EltSz == 32)
   4266     return false;
   4267 
   4268   return true;
   4269 }
   4270 
   4271 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
   4272   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4273   if (EltSz == 64)
   4274     return false;
   4275 
   4276   unsigned NumElts = VT.getVectorNumElements();
   4277   WhichResult = (M[0] == 0 ? 0 : 1);
   4278   unsigned Idx = WhichResult * NumElts / 2;
   4279   for (unsigned i = 0; i != NumElts; i += 2) {
   4280     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
   4281         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts))
   4282       return false;
   4283     Idx += 1;
   4284   }
   4285 
   4286   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4287   if (VT.is64BitVector() && EltSz == 32)
   4288     return false;
   4289 
   4290   return true;
   4291 }
   4292 
   4293 /// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
   4294 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
   4295 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
   4296 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
   4297   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
   4298   if (EltSz == 64)
   4299     return false;
   4300 
   4301   unsigned NumElts = VT.getVectorNumElements();
   4302   WhichResult = (M[0] == 0 ? 0 : 1);
   4303   unsigned Idx = WhichResult * NumElts / 2;
   4304   for (unsigned i = 0; i != NumElts; i += 2) {
   4305     if ((M[i] >= 0 && (unsigned) M[i] != Idx) ||
   4306         (M[i+1] >= 0 && (unsigned) M[i+1] != Idx))
   4307       return false;
   4308     Idx += 1;
   4309   }
   4310 
   4311   // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
   4312   if (VT.is64BitVector() && EltSz == 32)
   4313     return false;
   4314 
   4315   return true;
   4316 }
   4317 
   4318 /// \return true if this is a reverse operation on an vector.
   4319 static bool isReverseMask(ArrayRef<int> M, EVT VT) {
   4320   unsigned NumElts = VT.getVectorNumElements();
   4321   // Make sure the mask has the right size.
   4322   if (NumElts != M.size())
   4323       return false;
   4324 
   4325   // Look for <15, ..., 3, -1, 1, 0>.
   4326   for (unsigned i = 0; i != NumElts; ++i)
   4327     if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
   4328       return false;
   4329 
   4330   return true;
   4331 }
   4332 
   4333 // If N is an integer constant that can be moved into a register in one
   4334 // instruction, return an SDValue of such a constant (will become a MOV
   4335 // instruction).  Otherwise return null.
   4336 static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
   4337                                      const ARMSubtarget *ST, DebugLoc dl) {
   4338   uint64_t Val;
   4339   if (!isa<ConstantSDNode>(N))
   4340     return SDValue();
   4341   Val = cast<ConstantSDNode>(N)->getZExtValue();
   4342 
   4343   if (ST->isThumb1Only()) {
   4344     if (Val <= 255 || ~Val <= 255)
   4345       return DAG.getConstant(Val, MVT::i32);
   4346   } else {
   4347     if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
   4348       return DAG.getConstant(Val, MVT::i32);
   4349   }
   4350   return SDValue();
   4351 }
   4352 
   4353 // If this is a case we can't handle, return null and let the default
   4354 // expansion code take care of it.
   4355 SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   4356                                              const ARMSubtarget *ST) const {
   4357   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   4358   DebugLoc dl = Op.getDebugLoc();
   4359   EVT VT = Op.getValueType();
   4360 
   4361   APInt SplatBits, SplatUndef;
   4362   unsigned SplatBitSize;
   4363   bool HasAnyUndefs;
   4364   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
   4365     if (SplatBitSize <= 64) {
   4366       // Check if an immediate VMOV works.
   4367       EVT VmovVT;
   4368       SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
   4369                                       SplatUndef.getZExtValue(), SplatBitSize,
   4370                                       DAG, VmovVT, VT.is128BitVector(),
   4371                                       VMOVModImm);
   4372       if (Val.getNode()) {
   4373         SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
   4374         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   4375       }
   4376 
   4377       // Try an immediate VMVN.
   4378       uint64_t NegatedImm = (~SplatBits).getZExtValue();
   4379       Val = isNEONModifiedImm(NegatedImm,
   4380                                       SplatUndef.getZExtValue(), SplatBitSize,
   4381                                       DAG, VmovVT, VT.is128BitVector(),
   4382                                       VMVNModImm);
   4383       if (Val.getNode()) {
   4384         SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
   4385         return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
   4386       }
   4387 
   4388       // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
   4389       if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
   4390         int ImmVal = ARM_AM::getFP32Imm(SplatBits);
   4391         if (ImmVal != -1) {
   4392           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
   4393           return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
   4394         }
   4395       }
   4396     }
   4397   }
   4398 
   4399   // Scan through the operands to see if only one value is used.
   4400   //
   4401   // As an optimisation, even if more than one value is used it may be more
   4402   // profitable to splat with one value then change some lanes.
   4403   //
   4404   // Heuristically we decide to do this if the vector has a "dominant" value,
   4405   // defined as splatted to more than half of the lanes.
   4406   unsigned NumElts = VT.getVectorNumElements();
   4407   bool isOnlyLowElement = true;
   4408   bool usesOnlyOneValue = true;
   4409   bool hasDominantValue = false;
   4410   bool isConstant = true;
   4411 
   4412   // Map of the number of times a particular SDValue appears in the
   4413   // element list.
   4414   DenseMap<SDValue, unsigned> ValueCounts;
   4415   SDValue Value;
   4416   for (unsigned i = 0; i < NumElts; ++i) {
   4417     SDValue V = Op.getOperand(i);
   4418     if (V.getOpcode() == ISD::UNDEF)
   4419       continue;
   4420     if (i > 0)
   4421       isOnlyLowElement = false;
   4422     if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
   4423       isConstant = false;
   4424 
   4425     ValueCounts.insert(std::make_pair(V, 0));
   4426     unsigned &Count = ValueCounts[V];
   4427 
   4428     // Is this value dominant? (takes up more than half of the lanes)
   4429     if (++Count > (NumElts / 2)) {
   4430       hasDominantValue = true;
   4431       Value = V;
   4432     }
   4433   }
   4434   if (ValueCounts.size() != 1)
   4435     usesOnlyOneValue = false;
   4436   if (!Value.getNode() && ValueCounts.size() > 0)
   4437     Value = ValueCounts.begin()->first;
   4438 
   4439   if (ValueCounts.size() == 0)
   4440     return DAG.getUNDEF(VT);
   4441 
   4442   if (isOnlyLowElement)
   4443     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
   4444 
   4445   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4446 
   4447   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
   4448   // i32 and try again.
   4449   if (hasDominantValue && EltSize <= 32) {
   4450     if (!isConstant) {
   4451       SDValue N;
   4452 
   4453       // If we are VDUPing a value that comes directly from a vector, that will
   4454       // cause an unnecessary move to and from a GPR, where instead we could
   4455       // just use VDUPLANE. We can only do this if the lane being extracted
   4456       // is at a constant index, as the VDUP from lane instructions only have
   4457       // constant-index forms.
   4458       if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   4459           isa<ConstantSDNode>(Value->getOperand(1))) {
   4460         // We need to create a new undef vector to use for the VDUPLANE if the
   4461         // size of the vector from which we get the value is different than the
   4462         // size of the vector that we need to create. We will insert the element
   4463         // such that the register coalescer will remove unnecessary copies.
   4464         if (VT != Value->getOperand(0).getValueType()) {
   4465           ConstantSDNode *constIndex;
   4466           constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1));
   4467           assert(constIndex && "The index is not a constant!");
   4468           unsigned index = constIndex->getAPIntValue().getLimitedValue() %
   4469                              VT.getVectorNumElements();
   4470           N =  DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4471                  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
   4472                         Value, DAG.getConstant(index, MVT::i32)),
   4473                            DAG.getConstant(index, MVT::i32));
   4474         } else
   4475           N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4476                         Value->getOperand(0), Value->getOperand(1));
   4477       } else
   4478         N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
   4479 
   4480       if (!usesOnlyOneValue) {
   4481         // The dominant value was splatted as 'N', but we now have to insert
   4482         // all differing elements.
   4483         for (unsigned I = 0; I < NumElts; ++I) {
   4484           if (Op.getOperand(I) == Value)
   4485             continue;
   4486           SmallVector<SDValue, 3> Ops;
   4487           Ops.push_back(N);
   4488           Ops.push_back(Op.getOperand(I));
   4489           Ops.push_back(DAG.getConstant(I, MVT::i32));
   4490           N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3);
   4491         }
   4492       }
   4493       return N;
   4494     }
   4495     if (VT.getVectorElementType().isFloatingPoint()) {
   4496       SmallVector<SDValue, 8> Ops;
   4497       for (unsigned i = 0; i < NumElts; ++i)
   4498         Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
   4499                                   Op.getOperand(i)));
   4500       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
   4501       SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts);
   4502       Val = LowerBUILD_VECTOR(Val, DAG, ST);
   4503       if (Val.getNode())
   4504         return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4505     }
   4506     if (usesOnlyOneValue) {
   4507       SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
   4508       if (isConstant && Val.getNode())
   4509         return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
   4510     }
   4511   }
   4512 
   4513   // If all elements are constants and the case above didn't get hit, fall back
   4514   // to the default expansion, which will generate a load from the constant
   4515   // pool.
   4516   if (isConstant)
   4517     return SDValue();
   4518 
   4519   // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
   4520   if (NumElts >= 4) {
   4521     SDValue shuffle = ReconstructShuffle(Op, DAG);
   4522     if (shuffle != SDValue())
   4523       return shuffle;
   4524   }
   4525 
   4526   // Vectors with 32- or 64-bit elements can be built by directly assigning
   4527   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
   4528   // will be legalized.
   4529   if (EltSize >= 32) {
   4530     // Do the expansion with floating-point types, since that is what the VFP
   4531     // registers are defined to use, and since i64 is not legal.
   4532     EVT EltVT = EVT::getFloatingPointVT(EltSize);
   4533     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
   4534     SmallVector<SDValue, 8> Ops;
   4535     for (unsigned i = 0; i < NumElts; ++i)
   4536       Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
   4537     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
   4538     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4539   }
   4540 
   4541   return SDValue();
   4542 }
   4543 
   4544 // Gather data to see if the operation can be modelled as a
   4545 // shuffle in combination with VEXTs.
   4546 SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
   4547                                               SelectionDAG &DAG) const {
   4548   DebugLoc dl = Op.getDebugLoc();
   4549   EVT VT = Op.getValueType();
   4550   unsigned NumElts = VT.getVectorNumElements();
   4551 
   4552   SmallVector<SDValue, 2> SourceVecs;
   4553   SmallVector<unsigned, 2> MinElts;
   4554   SmallVector<unsigned, 2> MaxElts;
   4555 
   4556   for (unsigned i = 0; i < NumElts; ++i) {
   4557     SDValue V = Op.getOperand(i);
   4558     if (V.getOpcode() == ISD::UNDEF)
   4559       continue;
   4560     else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
   4561       // A shuffle can only come from building a vector from various
   4562       // elements of other vectors.
   4563       return SDValue();
   4564     } else if (V.getOperand(0).getValueType().getVectorElementType() !=
   4565                VT.getVectorElementType()) {
   4566       // This code doesn't know how to handle shuffles where the vector
   4567       // element types do not match (this happens because type legalization
   4568       // promotes the return type of EXTRACT_VECTOR_ELT).
   4569       // FIXME: It might be appropriate to extend this code to handle
   4570       // mismatched types.
   4571       return SDValue();
   4572     }
   4573 
   4574     // Record this extraction against the appropriate vector if possible...
   4575     SDValue SourceVec = V.getOperand(0);
   4576     // If the element number isn't a constant, we can't effectively
   4577     // analyze what's going on.
   4578     if (!isa<ConstantSDNode>(V.getOperand(1)))
   4579       return SDValue();
   4580     unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
   4581     bool FoundSource = false;
   4582     for (unsigned j = 0; j < SourceVecs.size(); ++j) {
   4583       if (SourceVecs[j] == SourceVec) {
   4584         if (MinElts[j] > EltNo)
   4585           MinElts[j] = EltNo;
   4586         if (MaxElts[j] < EltNo)
   4587           MaxElts[j] = EltNo;
   4588         FoundSource = true;
   4589         break;
   4590       }
   4591     }
   4592 
   4593     // Or record a new source if not...
   4594     if (!FoundSource) {
   4595       SourceVecs.push_back(SourceVec);
   4596       MinElts.push_back(EltNo);
   4597       MaxElts.push_back(EltNo);
   4598     }
   4599   }
   4600 
   4601   // Currently only do something sane when at most two source vectors
   4602   // involved.
   4603   if (SourceVecs.size() > 2)
   4604     return SDValue();
   4605 
   4606   SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) };
   4607   int VEXTOffsets[2] = {0, 0};
   4608 
   4609   // This loop extracts the usage patterns of the source vectors
   4610   // and prepares appropriate SDValues for a shuffle if possible.
   4611   for (unsigned i = 0; i < SourceVecs.size(); ++i) {
   4612     if (SourceVecs[i].getValueType() == VT) {
   4613       // No VEXT necessary
   4614       ShuffleSrcs[i] = SourceVecs[i];
   4615       VEXTOffsets[i] = 0;
   4616       continue;
   4617     } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) {
   4618       // It probably isn't worth padding out a smaller vector just to
   4619       // break it down again in a shuffle.
   4620       return SDValue();
   4621     }
   4622 
   4623     // Since only 64-bit and 128-bit vectors are legal on ARM and
   4624     // we've eliminated the other cases...
   4625     assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts &&
   4626            "unexpected vector sizes in ReconstructShuffle");
   4627 
   4628     if (MaxElts[i] - MinElts[i] >= NumElts) {
   4629       // Span too large for a VEXT to cope
   4630       return SDValue();
   4631     }
   4632 
   4633     if (MinElts[i] >= NumElts) {
   4634       // The extraction can just take the second half
   4635       VEXTOffsets[i] = NumElts;
   4636       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4637                                    SourceVecs[i],
   4638                                    DAG.getIntPtrConstant(NumElts));
   4639     } else if (MaxElts[i] < NumElts) {
   4640       // The extraction can just take the first half
   4641       VEXTOffsets[i] = 0;
   4642       ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4643                                    SourceVecs[i],
   4644                                    DAG.getIntPtrConstant(0));
   4645     } else {
   4646       // An actual VEXT is needed
   4647       VEXTOffsets[i] = MinElts[i];
   4648       SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4649                                      SourceVecs[i],
   4650                                      DAG.getIntPtrConstant(0));
   4651       SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
   4652                                      SourceVecs[i],
   4653                                      DAG.getIntPtrConstant(NumElts));
   4654       ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2,
   4655                                    DAG.getConstant(VEXTOffsets[i], MVT::i32));
   4656     }
   4657   }
   4658 
   4659   SmallVector<int, 8> Mask;
   4660 
   4661   for (unsigned i = 0; i < NumElts; ++i) {
   4662     SDValue Entry = Op.getOperand(i);
   4663     if (Entry.getOpcode() == ISD::UNDEF) {
   4664       Mask.push_back(-1);
   4665       continue;
   4666     }
   4667 
   4668     SDValue ExtractVec = Entry.getOperand(0);
   4669     int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i)
   4670                                           .getOperand(1))->getSExtValue();
   4671     if (ExtractVec == SourceVecs[0]) {
   4672       Mask.push_back(ExtractElt - VEXTOffsets[0]);
   4673     } else {
   4674       Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]);
   4675     }
   4676   }
   4677 
   4678   // Final check before we try to produce nonsense...
   4679   if (isShuffleMaskLegal(Mask, VT))
   4680     return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1],
   4681                                 &Mask[0]);
   4682 
   4683   return SDValue();
   4684 }
   4685 
   4686 /// isShuffleMaskLegal - Targets can use this to indicate that they only
   4687 /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
   4688 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
   4689 /// are assumed to be legal.
   4690 bool
   4691 ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   4692                                       EVT VT) const {
   4693   if (VT.getVectorNumElements() == 4 &&
   4694       (VT.is128BitVector() || VT.is64BitVector())) {
   4695     unsigned PFIndexes[4];
   4696     for (unsigned i = 0; i != 4; ++i) {
   4697       if (M[i] < 0)
   4698         PFIndexes[i] = 8;
   4699       else
   4700         PFIndexes[i] = M[i];
   4701     }
   4702 
   4703     // Compute the index in the perfect shuffle table.
   4704     unsigned PFTableIndex =
   4705       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
   4706     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
   4707     unsigned Cost = (PFEntry >> 30);
   4708 
   4709     if (Cost <= 4)
   4710       return true;
   4711   }
   4712 
   4713   bool ReverseVEXT;
   4714   unsigned Imm, WhichResult;
   4715 
   4716   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4717   return (EltSize >= 32 ||
   4718           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
   4719           isVREVMask(M, VT, 64) ||
   4720           isVREVMask(M, VT, 32) ||
   4721           isVREVMask(M, VT, 16) ||
   4722           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
   4723           isVTBLMask(M, VT) ||
   4724           isVTRNMask(M, VT, WhichResult) ||
   4725           isVUZPMask(M, VT, WhichResult) ||
   4726           isVZIPMask(M, VT, WhichResult) ||
   4727           isVTRN_v_undef_Mask(M, VT, WhichResult) ||
   4728           isVUZP_v_undef_Mask(M, VT, WhichResult) ||
   4729           isVZIP_v_undef_Mask(M, VT, WhichResult) ||
   4730           ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
   4731 }
   4732 
   4733 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
   4734 /// the specified operations to build the shuffle.
   4735 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
   4736                                       SDValue RHS, SelectionDAG &DAG,
   4737                                       DebugLoc dl) {
   4738   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   4739   unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
   4740   unsigned RHSID = (PFEntry >>  0) & ((1 << 13)-1);
   4741 
   4742   enum {
   4743     OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
   4744     OP_VREV,
   4745     OP_VDUP0,
   4746     OP_VDUP1,
   4747     OP_VDUP2,
   4748     OP_VDUP3,
   4749     OP_VEXT1,
   4750     OP_VEXT2,
   4751     OP_VEXT3,
   4752     OP_VUZPL, // VUZP, left result
   4753     OP_VUZPR, // VUZP, right result
   4754     OP_VZIPL, // VZIP, left result
   4755     OP_VZIPR, // VZIP, right result
   4756     OP_VTRNL, // VTRN, left result
   4757     OP_VTRNR  // VTRN, right result
   4758   };
   4759 
   4760   if (OpNum == OP_COPY) {
   4761     if (LHSID == (1*9+2)*9+3) return LHS;
   4762     assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
   4763     return RHS;
   4764   }
   4765 
   4766   SDValue OpLHS, OpRHS;
   4767   OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
   4768   OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
   4769   EVT VT = OpLHS.getValueType();
   4770 
   4771   switch (OpNum) {
   4772   default: llvm_unreachable("Unknown shuffle opcode!");
   4773   case OP_VREV:
   4774     // VREV divides the vector in half and swaps within the half.
   4775     if (VT.getVectorElementType() == MVT::i32 ||
   4776         VT.getVectorElementType() == MVT::f32)
   4777       return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
   4778     // vrev <4 x i16> -> VREV32
   4779     if (VT.getVectorElementType() == MVT::i16)
   4780       return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
   4781     // vrev <4 x i8> -> VREV16
   4782     assert(VT.getVectorElementType() == MVT::i8);
   4783     return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
   4784   case OP_VDUP0:
   4785   case OP_VDUP1:
   4786   case OP_VDUP2:
   4787   case OP_VDUP3:
   4788     return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
   4789                        OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32));
   4790   case OP_VEXT1:
   4791   case OP_VEXT2:
   4792   case OP_VEXT3:
   4793     return DAG.getNode(ARMISD::VEXT, dl, VT,
   4794                        OpLHS, OpRHS,
   4795                        DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32));
   4796   case OP_VUZPL:
   4797   case OP_VUZPR:
   4798     return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4799                        OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
   4800   case OP_VZIPL:
   4801   case OP_VZIPR:
   4802     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4803                        OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
   4804   case OP_VTRNL:
   4805   case OP_VTRNR:
   4806     return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4807                        OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
   4808   }
   4809 }
   4810 
   4811 static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
   4812                                        ArrayRef<int> ShuffleMask,
   4813                                        SelectionDAG &DAG) {
   4814   // Check to see if we can use the VTBL instruction.
   4815   SDValue V1 = Op.getOperand(0);
   4816   SDValue V2 = Op.getOperand(1);
   4817   DebugLoc DL = Op.getDebugLoc();
   4818 
   4819   SmallVector<SDValue, 8> VTBLMask;
   4820   for (ArrayRef<int>::iterator
   4821          I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
   4822     VTBLMask.push_back(DAG.getConstant(*I, MVT::i32));
   4823 
   4824   if (V2.getNode()->getOpcode() == ISD::UNDEF)
   4825     return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
   4826                        DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
   4827                                    &VTBLMask[0], 8));
   4828 
   4829   return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
   4830                      DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8,
   4831                                  &VTBLMask[0], 8));
   4832 }
   4833 
   4834 static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
   4835                                                       SelectionDAG &DAG) {
   4836   DebugLoc DL = Op.getDebugLoc();
   4837   SDValue OpLHS = Op.getOperand(0);
   4838   EVT VT = OpLHS.getValueType();
   4839 
   4840   assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&
   4841          "Expect an v8i16/v16i8 type");
   4842   OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
   4843   // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
   4844   // extract the first 8 bytes into the top double word and the last 8 bytes
   4845   // into the bottom double word. The v8i16 case is similar.
   4846   unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
   4847   return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
   4848                      DAG.getConstant(ExtractNum, MVT::i32));
   4849 }
   4850 
   4851 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   4852   SDValue V1 = Op.getOperand(0);
   4853   SDValue V2 = Op.getOperand(1);
   4854   DebugLoc dl = Op.getDebugLoc();
   4855   EVT VT = Op.getValueType();
   4856   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
   4857 
   4858   // Convert shuffles that are directly supported on NEON to target-specific
   4859   // DAG nodes, instead of keeping them as shuffles and matching them again
   4860   // during code selection.  This is more efficient and avoids the possibility
   4861   // of inconsistencies between legalization and selection.
   4862   // FIXME: floating-point vectors should be canonicalized to integer vectors
   4863   // of the same time so that they get CSEd properly.
   4864   ArrayRef<int> ShuffleMask = SVN->getMask();
   4865 
   4866   unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   4867   if (EltSize <= 32) {
   4868     if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) {
   4869       int Lane = SVN->getSplatIndex();
   4870       // If this is undef splat, generate it via "just" vdup, if possible.
   4871       if (Lane == -1) Lane = 0;
   4872 
   4873       // Test if V1 is a SCALAR_TO_VECTOR.
   4874       if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   4875         return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
   4876       }
   4877       // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
   4878       // (and probably will turn into a SCALAR_TO_VECTOR once legalization
   4879       // reaches it).
   4880       if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
   4881           !isa<ConstantSDNode>(V1.getOperand(0))) {
   4882         bool IsScalarToVector = true;
   4883         for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
   4884           if (V1.getOperand(i).getOpcode() != ISD::UNDEF) {
   4885             IsScalarToVector = false;
   4886             break;
   4887           }
   4888         if (IsScalarToVector)
   4889           return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
   4890       }
   4891       return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
   4892                          DAG.getConstant(Lane, MVT::i32));
   4893     }
   4894 
   4895     bool ReverseVEXT;
   4896     unsigned Imm;
   4897     if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
   4898       if (ReverseVEXT)
   4899         std::swap(V1, V2);
   4900       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
   4901                          DAG.getConstant(Imm, MVT::i32));
   4902     }
   4903 
   4904     if (isVREVMask(ShuffleMask, VT, 64))
   4905       return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
   4906     if (isVREVMask(ShuffleMask, VT, 32))
   4907       return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
   4908     if (isVREVMask(ShuffleMask, VT, 16))
   4909       return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
   4910 
   4911     if (V2->getOpcode() == ISD::UNDEF &&
   4912         isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
   4913       return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
   4914                          DAG.getConstant(Imm, MVT::i32));
   4915     }
   4916 
   4917     // Check for Neon shuffles that modify both input vectors in place.
   4918     // If both results are used, i.e., if there are two shuffles with the same
   4919     // source operands and with masks corresponding to both results of one of
   4920     // these operations, DAG memoization will ensure that a single node is
   4921     // used for both shuffles.
   4922     unsigned WhichResult;
   4923     if (isVTRNMask(ShuffleMask, VT, WhichResult))
   4924       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4925                          V1, V2).getValue(WhichResult);
   4926     if (isVUZPMask(ShuffleMask, VT, WhichResult))
   4927       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4928                          V1, V2).getValue(WhichResult);
   4929     if (isVZIPMask(ShuffleMask, VT, WhichResult))
   4930       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4931                          V1, V2).getValue(WhichResult);
   4932 
   4933     if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4934       return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
   4935                          V1, V1).getValue(WhichResult);
   4936     if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4937       return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
   4938                          V1, V1).getValue(WhichResult);
   4939     if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
   4940       return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
   4941                          V1, V1).getValue(WhichResult);
   4942   }
   4943 
   4944   // If the shuffle is not directly supported and it has 4 elements, use
   4945   // the PerfectShuffle-generated table to synthesize it from other shuffles.
   4946   unsigned NumElts = VT.getVectorNumElements();
   4947   if (NumElts == 4) {
   4948     unsigned PFIndexes[4];
   4949     for (unsigned i = 0; i != 4; ++i) {
   4950       if (ShuffleMask[i] < 0)
   4951         PFIndexes[i] = 8;
   4952       else
   4953         PFIndexes[i] = ShuffleMask[i];
   4954     }
   4955 
   4956     // Compute the index in the perfect shuffle table.
   4957     unsigned PFTableIndex =
   4958       PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
   4959     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
   4960     unsigned Cost = (PFEntry >> 30);
   4961 
   4962     if (Cost <= 4)
   4963       return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
   4964   }
   4965 
   4966   // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
   4967   if (EltSize >= 32) {
   4968     // Do the expansion with floating-point types, since that is what the VFP
   4969     // registers are defined to use, and since i64 is not legal.
   4970     EVT EltVT = EVT::getFloatingPointVT(EltSize);
   4971     EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
   4972     V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
   4973     V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
   4974     SmallVector<SDValue, 8> Ops;
   4975     for (unsigned i = 0; i < NumElts; ++i) {
   4976       if (ShuffleMask[i] < 0)
   4977         Ops.push_back(DAG.getUNDEF(EltVT));
   4978       else
   4979         Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
   4980                                   ShuffleMask[i] < (int)NumElts ? V1 : V2,
   4981                                   DAG.getConstant(ShuffleMask[i] & (NumElts-1),
   4982                                                   MVT::i32)));
   4983     }
   4984     SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts);
   4985     return DAG.getNode(ISD::BITCAST, dl, VT, Val);
   4986   }
   4987 
   4988   if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
   4989     return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
   4990 
   4991   if (VT == MVT::v8i8) {
   4992     SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG);
   4993     if (NewOp.getNode())
   4994       return NewOp;
   4995   }
   4996 
   4997   return SDValue();
   4998 }
   4999 
   5000 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   5001   // INSERT_VECTOR_ELT is legal only for immediate indexes.
   5002   SDValue Lane = Op.getOperand(2);
   5003   if (!isa<ConstantSDNode>(Lane))
   5004     return SDValue();
   5005 
   5006   return Op;
   5007 }
   5008 
   5009 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
   5010   // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
   5011   SDValue Lane = Op.getOperand(1);
   5012   if (!isa<ConstantSDNode>(Lane))
   5013     return SDValue();
   5014 
   5015   SDValue Vec = Op.getOperand(0);
   5016   if (Op.getValueType() == MVT::i32 &&
   5017       Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
   5018     DebugLoc dl = Op.getDebugLoc();
   5019     return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
   5020   }
   5021 
   5022   return Op;
   5023 }
   5024 
   5025 static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
   5026   // The only time a CONCAT_VECTORS operation can have legal types is when
   5027   // two 64-bit vectors are concatenated to a 128-bit vector.
   5028   assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&
   5029          "unexpected CONCAT_VECTORS");
   5030   DebugLoc dl = Op.getDebugLoc();
   5031   SDValue Val = DAG.getUNDEF(MVT::v2f64);
   5032   SDValue Op0 = Op.getOperand(0);
   5033   SDValue Op1 = Op.getOperand(1);
   5034   if (Op0.getOpcode() != ISD::UNDEF)
   5035     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
   5036                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
   5037                       DAG.getIntPtrConstant(0));
   5038   if (Op1.getOpcode() != ISD::UNDEF)
   5039     Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
   5040                       DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
   5041                       DAG.getIntPtrConstant(1));
   5042   return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
   5043 }
   5044 
   5045 /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
   5046 /// element has been zero/sign-extended, depending on the isSigned parameter,
   5047 /// from an integer type half its size.
   5048 static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
   5049                                    bool isSigned) {
   5050   // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
   5051   EVT VT = N->getValueType(0);
   5052   if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
   5053     SDNode *BVN = N->getOperand(0).getNode();
   5054     if (BVN->getValueType(0) != MVT::v4i32 ||
   5055         BVN->getOpcode() != ISD::BUILD_VECTOR)
   5056       return false;
   5057     unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
   5058     unsigned HiElt = 1 - LoElt;
   5059     ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
   5060     ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
   5061     ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
   5062     ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
   5063     if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
   5064       return false;
   5065     if (isSigned) {
   5066       if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
   5067           Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
   5068         return true;
   5069     } else {
   5070       if (Hi0->isNullValue() && Hi1->isNullValue())
   5071         return true;
   5072     }
   5073     return false;
   5074   }
   5075 
   5076   if (N->getOpcode() != ISD::BUILD_VECTOR)
   5077     return false;
   5078 
   5079   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   5080     SDNode *Elt = N->getOperand(i).getNode();
   5081     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
   5082       unsigned EltSize = VT.getVectorElementType().getSizeInBits();
   5083       unsigned HalfSize = EltSize / 2;
   5084       if (isSigned) {
   5085         if (!isIntN(HalfSize, C->getSExtValue()))
   5086           return false;
   5087       } else {
   5088         if (!isUIntN(HalfSize, C->getZExtValue()))
   5089           return false;
   5090       }
   5091       continue;
   5092     }
   5093     return false;
   5094   }
   5095 
   5096   return true;
   5097 }
   5098 
   5099 /// isSignExtended - Check if a node is a vector value that is sign-extended
   5100 /// or a constant BUILD_VECTOR with sign-extended elements.
   5101 static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
   5102   if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
   5103     return true;
   5104   if (isExtendedBUILD_VECTOR(N, DAG, true))
   5105     return true;
   5106   return false;
   5107 }
   5108 
   5109 /// isZeroExtended - Check if a node is a vector value that is zero-extended
   5110 /// or a constant BUILD_VECTOR with zero-extended elements.
   5111 static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
   5112   if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
   5113     return true;
   5114   if (isExtendedBUILD_VECTOR(N, DAG, false))
   5115     return true;
   5116   return false;
   5117 }
   5118 
   5119 /// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
   5120 /// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
   5121 /// We insert the required extension here to get the vector to fill a D register.
   5122 static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
   5123                                             const EVT &OrigTy,
   5124                                             const EVT &ExtTy,
   5125                                             unsigned ExtOpcode) {
   5126   // The vector originally had a size of OrigTy. It was then extended to ExtTy.
   5127   // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
   5128   // 64-bits we need to insert a new extension so that it will be 64-bits.
   5129   assert(ExtTy.is128BitVector() && "Unexpected extension size");
   5130   if (OrigTy.getSizeInBits() >= 64)
   5131     return N;
   5132 
   5133   // Must extend size to at least 64 bits to be used as an operand for VMULL.
   5134   MVT::SimpleValueType OrigSimpleTy = OrigTy.getSimpleVT().SimpleTy;
   5135   EVT NewVT;
   5136   switch (OrigSimpleTy) {
   5137   default: llvm_unreachable("Unexpected Orig Vector Type");
   5138   case MVT::v2i8:
   5139   case MVT::v2i16:
   5140     NewVT = MVT::v2i32;
   5141     break;
   5142   case MVT::v4i8:
   5143     NewVT = MVT::v4i16;
   5144     break;
   5145   }
   5146   return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N);
   5147 }
   5148 
   5149 /// SkipLoadExtensionForVMULL - return a load of the original vector size that
   5150 /// does not do any sign/zero extension. If the original vector is less
   5151 /// than 64 bits, an appropriate extension will be added after the load to
   5152 /// reach a total size of 64 bits. We have to add the extension separately
   5153 /// because ARM does not have a sign/zero extending load for vectors.
   5154 static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
   5155   SDValue NonExtendingLoad =
   5156     DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(),
   5157                 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(),
   5158                 LD->isNonTemporal(), LD->isInvariant(),
   5159                 LD->getAlignment());
   5160   unsigned ExtOp = 0;
   5161   switch (LD->getExtensionType()) {
   5162   default: llvm_unreachable("Unexpected LoadExtType");
   5163   case ISD::EXTLOAD:
   5164   case ISD::SEXTLOAD: ExtOp = ISD::SIGN_EXTEND; break;
   5165   case ISD::ZEXTLOAD: ExtOp = ISD::ZERO_EXTEND; break;
   5166   }
   5167   MVT::SimpleValueType MemType = LD->getMemoryVT().getSimpleVT().SimpleTy;
   5168   MVT::SimpleValueType ExtType = LD->getValueType(0).getSimpleVT().SimpleTy;
   5169   return AddRequiredExtensionForVMULL(NonExtendingLoad, DAG,
   5170                                       MemType, ExtType, ExtOp);
   5171 }
   5172 
   5173 /// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
   5174 /// extending load, or BUILD_VECTOR with extended elements, return the
   5175 /// unextended value. The unextended vector should be 64 bits so that it can
   5176 /// be used as an operand to a VMULL instruction. If the original vector size
   5177 /// before extension is less than 64 bits we add a an extension to resize
   5178 /// the vector to 64 bits.
   5179 static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   5180   if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
   5181     return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
   5182                                         N->getOperand(0)->getValueType(0),
   5183                                         N->getValueType(0),
   5184                                         N->getOpcode());
   5185 
   5186   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
   5187     return SkipLoadExtensionForVMULL(LD, DAG);
   5188 
   5189   // Otherwise, the value must be a BUILD_VECTOR.  For v2i64, it will
   5190   // have been legalized as a BITCAST from v4i32.
   5191   if (N->getOpcode() == ISD::BITCAST) {
   5192     SDNode *BVN = N->getOperand(0).getNode();
   5193     assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&
   5194            BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR");
   5195     unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0;
   5196     return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32,
   5197                        BVN->getOperand(LowElt), BVN->getOperand(LowElt+2));
   5198   }
   5199   // Construct a new BUILD_VECTOR with elements truncated to half the size.
   5200   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   5201   EVT VT = N->getValueType(0);
   5202   unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
   5203   unsigned NumElts = VT.getVectorNumElements();
   5204   MVT TruncVT = MVT::getIntegerVT(EltSize);
   5205   SmallVector<SDValue, 8> Ops;
   5206   for (unsigned i = 0; i != NumElts; ++i) {
   5207     ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
   5208     const APInt &CInt = C->getAPIntValue();
   5209     // Element types smaller than 32 bits are not legal, so use i32 elements.
   5210     // The values are implicitly truncated so sext vs. zext doesn't matter.
   5211     Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32));
   5212   }
   5213   return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
   5214                      MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts);
   5215 }
   5216 
   5217 static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
   5218   unsigned Opcode = N->getOpcode();
   5219   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
   5220     SDNode *N0 = N->getOperand(0).getNode();
   5221     SDNode *N1 = N->getOperand(1).getNode();
   5222     return N0->hasOneUse() && N1->hasOneUse() &&
   5223       isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
   5224   }
   5225   return false;
   5226 }
   5227 
   5228 static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
   5229   unsigned Opcode = N->getOpcode();
   5230   if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
   5231     SDNode *N0 = N->getOperand(0).getNode();
   5232     SDNode *N1 = N->getOperand(1).getNode();
   5233     return N0->hasOneUse() && N1->hasOneUse() &&
   5234       isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
   5235   }
   5236   return false;
   5237 }
   5238 
   5239 static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
   5240   // Multiplications are only custom-lowered for 128-bit vectors so that
   5241   // VMULL can be detected.  Otherwise v2i64 multiplications are not legal.
   5242   EVT VT = Op.getValueType();
   5243   assert(VT.is128BitVector() && VT.isInteger() &&
   5244          "unexpected type for custom-lowering ISD::MUL");
   5245   SDNode *N0 = Op.getOperand(0).getNode();
   5246   SDNode *N1 = Op.getOperand(1).getNode();
   5247   unsigned NewOpc = 0;
   5248   bool isMLA = false;
   5249   bool isN0SExt = isSignExtended(N0, DAG);
   5250   bool isN1SExt = isSignExtended(N1, DAG);
   5251   if (isN0SExt && isN1SExt)
   5252     NewOpc = ARMISD::VMULLs;
   5253   else {
   5254     bool isN0ZExt = isZeroExtended(N0, DAG);
   5255     bool isN1ZExt = isZeroExtended(N1, DAG);
   5256     if (isN0ZExt && isN1ZExt)
   5257       NewOpc = ARMISD::VMULLu;
   5258     else if (isN1SExt || isN1ZExt) {
   5259       // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
   5260       // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
   5261       if (isN1SExt && isAddSubSExt(N0, DAG)) {
   5262         NewOpc = ARMISD::VMULLs;
   5263         isMLA = true;
   5264       } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
   5265         NewOpc = ARMISD::VMULLu;
   5266         isMLA = true;
   5267       } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
   5268         std::swap(N0, N1);
   5269         NewOpc = ARMISD::VMULLu;
   5270         isMLA = true;
   5271       }
   5272     }
   5273 
   5274     if (!NewOpc) {
   5275       if (VT == MVT::v2i64)
   5276         // Fall through to expand this.  It is not legal.
   5277         return SDValue();
   5278       else
   5279         // Other vector multiplications are legal.
   5280         return Op;
   5281     }
   5282   }
   5283 
   5284   // Legalize to a VMULL instruction.
   5285   DebugLoc DL = Op.getDebugLoc();
   5286   SDValue Op0;
   5287   SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
   5288   if (!isMLA) {
   5289     Op0 = SkipExtensionForVMULL(N0, DAG);
   5290     assert(Op0.getValueType().is64BitVector() &&
   5291            Op1.getValueType().is64BitVector() &&
   5292            "unexpected types for extended operands to VMULL");
   5293     return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
   5294   }
   5295 
   5296   // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
   5297   // isel lowering to take advantage of no-stall back to back vmul + vmla.
   5298   //   vmull q0, d4, d6
   5299   //   vmlal q0, d5, d6
   5300   // is faster than
   5301   //   vaddl q0, d4, d5
   5302   //   vmovl q1, d6
   5303   //   vmul  q0, q0, q1
   5304   SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
   5305   SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
   5306   EVT Op1VT = Op1.getValueType();
   5307   return DAG.getNode(N0->getOpcode(), DL, VT,
   5308                      DAG.getNode(NewOpc, DL, VT,
   5309                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
   5310                      DAG.getNode(NewOpc, DL, VT,
   5311                                DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
   5312 }
   5313 
   5314 static SDValue
   5315 LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) {
   5316   // Convert to float
   5317   // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
   5318   // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
   5319   X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
   5320   Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
   5321   X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
   5322   Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
   5323   // Get reciprocal estimate.
   5324   // float4 recip = vrecpeq_f32(yf);
   5325   Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5326                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y);
   5327   // Because char has a smaller range than uchar, we can actually get away
   5328   // without any newton steps.  This requires that we use a weird bias
   5329   // of 0xb000, however (again, this has been exhaustively tested).
   5330   // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
   5331   X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
   5332   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
   5333   Y = DAG.getConstant(0xb000, MVT::i32);
   5334   Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y);
   5335   X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
   5336   X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
   5337   // Convert back to short.
   5338   X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
   5339   X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
   5340   return X;
   5341 }
   5342 
   5343 static SDValue
   5344 LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) {
   5345   SDValue N2;
   5346   // Convert to float.
   5347   // float4 yf = vcvt_f32_s32(vmovl_s16(y));
   5348   // float4 xf = vcvt_f32_s32(vmovl_s16(x));
   5349   N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
   5350   N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
   5351   N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
   5352   N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
   5353 
   5354   // Use reciprocal estimate and one refinement step.
   5355   // float4 recip = vrecpeq_f32(yf);
   5356   // recip *= vrecpsq_f32(yf, recip);
   5357   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5358                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1);
   5359   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5360                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5361                    N1, N2);
   5362   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5363   // Because short has a smaller range than ushort, we can actually get away
   5364   // with only a single newton step.  This requires that we use a weird bias
   5365   // of 89, however (again, this has been exhaustively tested).
   5366   // float4 result = as_float4(as_int4(xf*recip) + 0x89);
   5367   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   5368   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
   5369   N1 = DAG.getConstant(0x89, MVT::i32);
   5370   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   5371   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   5372   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
   5373   // Convert back to integer and return.
   5374   // return vmovn_s32(vcvt_s32_f32(result));
   5375   N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
   5376   N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
   5377   return N0;
   5378 }
   5379 
   5380 static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
   5381   EVT VT = Op.getValueType();
   5382   assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
   5383          "unexpected type for custom-lowering ISD::SDIV");
   5384 
   5385   DebugLoc dl = Op.getDebugLoc();
   5386   SDValue N0 = Op.getOperand(0);
   5387   SDValue N1 = Op.getOperand(1);
   5388   SDValue N2, N3;
   5389 
   5390   if (VT == MVT::v8i8) {
   5391     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
   5392     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
   5393 
   5394     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5395                      DAG.getIntPtrConstant(4));
   5396     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5397                      DAG.getIntPtrConstant(4));
   5398     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5399                      DAG.getIntPtrConstant(0));
   5400     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5401                      DAG.getIntPtrConstant(0));
   5402 
   5403     N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
   5404     N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
   5405 
   5406     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
   5407     N0 = LowerCONCAT_VECTORS(N0, DAG);
   5408 
   5409     N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
   5410     return N0;
   5411   }
   5412   return LowerSDIV_v4i16(N0, N1, dl, DAG);
   5413 }
   5414 
   5415 static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
   5416   EVT VT = Op.getValueType();
   5417   assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&
   5418          "unexpected type for custom-lowering ISD::UDIV");
   5419 
   5420   DebugLoc dl = Op.getDebugLoc();
   5421   SDValue N0 = Op.getOperand(0);
   5422   SDValue N1 = Op.getOperand(1);
   5423   SDValue N2, N3;
   5424 
   5425   if (VT == MVT::v8i8) {
   5426     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
   5427     N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
   5428 
   5429     N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5430                      DAG.getIntPtrConstant(4));
   5431     N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5432                      DAG.getIntPtrConstant(4));
   5433     N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
   5434                      DAG.getIntPtrConstant(0));
   5435     N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
   5436                      DAG.getIntPtrConstant(0));
   5437 
   5438     N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
   5439     N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
   5440 
   5441     N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
   5442     N0 = LowerCONCAT_VECTORS(N0, DAG);
   5443 
   5444     N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
   5445                      DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32),
   5446                      N0);
   5447     return N0;
   5448   }
   5449 
   5450   // v4i16 sdiv ... Convert to float.
   5451   // float4 yf = vcvt_f32_s32(vmovl_u16(y));
   5452   // float4 xf = vcvt_f32_s32(vmovl_u16(x));
   5453   N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
   5454   N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
   5455   N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
   5456   SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
   5457 
   5458   // Use reciprocal estimate and two refinement steps.
   5459   // float4 recip = vrecpeq_f32(yf);
   5460   // recip *= vrecpsq_f32(yf, recip);
   5461   // recip *= vrecpsq_f32(yf, recip);
   5462   N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5463                    DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1);
   5464   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5465                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5466                    BN1, N2);
   5467   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5468   N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
   5469                    DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32),
   5470                    BN1, N2);
   5471   N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
   5472   // Simply multiplying by the reciprocal estimate can leave us a few ulps
   5473   // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
   5474   // and that it will never cause us to return an answer too large).
   5475   // float4 result = as_float4(as_int4(xf*recip) + 2);
   5476   N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
   5477   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
   5478   N1 = DAG.getConstant(2, MVT::i32);
   5479   N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1);
   5480   N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
   5481   N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
   5482   // Convert back to integer and return.
   5483   // return vmovn_u32(vcvt_s32_f32(result));
   5484   N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
   5485   N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
   5486   return N0;
   5487 }
   5488 
   5489 static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
   5490   EVT VT = Op.getNode()->getValueType(0);
   5491   SDVTList VTs = DAG.getVTList(VT, MVT::i32);
   5492 
   5493   unsigned Opc;
   5494   bool ExtraOp = false;
   5495   switch (Op.getOpcode()) {
   5496   default: llvm_unreachable("Invalid code");
   5497   case ISD::ADDC: Opc = ARMISD::ADDC; break;
   5498   case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
   5499   case ISD::SUBC: Opc = ARMISD::SUBC; break;
   5500   case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
   5501   }
   5502 
   5503   if (!ExtraOp)
   5504     return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
   5505                        Op.getOperand(1));
   5506   return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0),
   5507                      Op.getOperand(1), Op.getOperand(2));
   5508 }
   5509 
   5510 static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
   5511   // Monotonic load/store is legal for all targets
   5512   if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic)
   5513     return Op;
   5514 
   5515   // Aquire/Release load/store is not legal for targets without a
   5516   // dmb or equivalent available.
   5517   return SDValue();
   5518 }
   5519 
   5520 
   5521 static void
   5522 ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results,
   5523                     SelectionDAG &DAG, unsigned NewOp) {
   5524   DebugLoc dl = Node->getDebugLoc();
   5525   assert (Node->getValueType(0) == MVT::i64 &&
   5526           "Only know how to expand i64 atomics");
   5527 
   5528   SmallVector<SDValue, 6> Ops;
   5529   Ops.push_back(Node->getOperand(0)); // Chain
   5530   Ops.push_back(Node->getOperand(1)); // Ptr
   5531   // Low part of Val1
   5532   Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5533                             Node->getOperand(2), DAG.getIntPtrConstant(0)));
   5534   // High part of Val1
   5535   Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5536                             Node->getOperand(2), DAG.getIntPtrConstant(1)));
   5537   if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) {
   5538     // High part of Val1
   5539     Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5540                               Node->getOperand(3), DAG.getIntPtrConstant(0)));
   5541     // High part of Val2
   5542     Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32,
   5543                               Node->getOperand(3), DAG.getIntPtrConstant(1)));
   5544   }
   5545   SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   5546   SDValue Result =
   5547     DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64,
   5548                             cast<MemSDNode>(Node)->getMemOperand());
   5549   SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) };
   5550   Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2));
   5551   Results.push_back(Result.getValue(2));
   5552 }
   5553 
   5554 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   5555   switch (Op.getOpcode()) {
   5556   default: llvm_unreachable("Don't know how to custom lower this!");
   5557   case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
   5558   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
   5559   case ISD::GlobalAddress:
   5560     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
   5561       LowerGlobalAddressELF(Op, DAG);
   5562   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   5563   case ISD::SELECT:        return LowerSELECT(Op, DAG);
   5564   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
   5565   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   5566   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   5567   case ISD::VASTART:       return LowerVASTART(Op, DAG);
   5568   case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   5569   case ISD::ATOMIC_FENCE:  return LowerATOMIC_FENCE(Op, DAG, Subtarget);
   5570   case ISD::PREFETCH:      return LowerPREFETCH(Op, DAG, Subtarget);
   5571   case ISD::SINT_TO_FP:
   5572   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   5573   case ISD::FP_TO_SINT:
   5574   case ISD::FP_TO_UINT:    return LowerFP_TO_INT(Op, DAG);
   5575   case ISD::FCOPYSIGN:     return LowerFCOPYSIGN(Op, DAG);
   5576   case ISD::RETURNADDR:    return LowerRETURNADDR(Op, DAG);
   5577   case ISD::FRAMEADDR:     return LowerFRAMEADDR(Op, DAG);
   5578   case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   5579   case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
   5580   case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
   5581   case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
   5582                                                                Subtarget);
   5583   case ISD::BITCAST:       return ExpandBITCAST(Op.getNode(), DAG);
   5584   case ISD::SHL:
   5585   case ISD::SRL:
   5586   case ISD::SRA:           return LowerShift(Op.getNode(), DAG, Subtarget);
   5587   case ISD::SHL_PARTS:     return LowerShiftLeftParts(Op, DAG);
   5588   case ISD::SRL_PARTS:
   5589   case ISD::SRA_PARTS:     return LowerShiftRightParts(Op, DAG);
   5590   case ISD::CTTZ:          return LowerCTTZ(Op.getNode(), DAG, Subtarget);
   5591   case ISD::CTPOP:         return LowerCTPOP(Op.getNode(), DAG, Subtarget);
   5592   case ISD::SETCC:         return LowerVSETCC(Op, DAG);
   5593   case ISD::ConstantFP:    return LowerConstantFP(Op, DAG, Subtarget);
   5594   case ISD::BUILD_VECTOR:  return LowerBUILD_VECTOR(Op, DAG, Subtarget);
   5595   case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
   5596   case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
   5597   case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   5598   case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
   5599   case ISD::FLT_ROUNDS_:   return LowerFLT_ROUNDS_(Op, DAG);
   5600   case ISD::MUL:           return LowerMUL(Op, DAG);
   5601   case ISD::SDIV:          return LowerSDIV(Op, DAG);
   5602   case ISD::UDIV:          return LowerUDIV(Op, DAG);
   5603   case ISD::ADDC:
   5604   case ISD::ADDE:
   5605   case ISD::SUBC:
   5606   case ISD::SUBE:          return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
   5607   case ISD::ATOMIC_LOAD:
   5608   case ISD::ATOMIC_STORE:  return LowerAtomicLoadStore(Op, DAG);
   5609   }
   5610 }
   5611 
   5612 /// ReplaceNodeResults - Replace the results of node with an illegal result
   5613 /// type with new values built out of custom code.
   5614 void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
   5615                                            SmallVectorImpl<SDValue>&Results,
   5616                                            SelectionDAG &DAG) const {
   5617   SDValue Res;
   5618   switch (N->getOpcode()) {
   5619   default:
   5620     llvm_unreachable("Don't know how to custom expand this!");
   5621   case ISD::BITCAST:
   5622     Res = ExpandBITCAST(N, DAG);
   5623     break;
   5624   case ISD::SRL:
   5625   case ISD::SRA:
   5626     Res = Expand64BitShift(N, DAG, Subtarget);
   5627     break;
   5628   case ISD::ATOMIC_LOAD_ADD:
   5629     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG);
   5630     return;
   5631   case ISD::ATOMIC_LOAD_AND:
   5632     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG);
   5633     return;
   5634   case ISD::ATOMIC_LOAD_NAND:
   5635     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG);
   5636     return;
   5637   case ISD::ATOMIC_LOAD_OR:
   5638     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG);
   5639     return;
   5640   case ISD::ATOMIC_LOAD_SUB:
   5641     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG);
   5642     return;
   5643   case ISD::ATOMIC_LOAD_XOR:
   5644     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG);
   5645     return;
   5646   case ISD::ATOMIC_SWAP:
   5647     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG);
   5648     return;
   5649   case ISD::ATOMIC_CMP_SWAP:
   5650     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG);
   5651     return;
   5652   case ISD::ATOMIC_LOAD_MIN:
   5653     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG);
   5654     return;
   5655   case ISD::ATOMIC_LOAD_UMIN:
   5656     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG);
   5657     return;
   5658   case ISD::ATOMIC_LOAD_MAX:
   5659     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG);
   5660     return;
   5661   case ISD::ATOMIC_LOAD_UMAX:
   5662     ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG);
   5663     return;
   5664   }
   5665   if (Res.getNode())
   5666     Results.push_back(Res);
   5667 }
   5668 
   5669 //===----------------------------------------------------------------------===//
   5670 //                           ARM Scheduler Hooks
   5671 //===----------------------------------------------------------------------===//
   5672 
   5673 MachineBasicBlock *
   5674 ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   5675                                      MachineBasicBlock *BB,
   5676                                      unsigned Size) const {
   5677   unsigned dest    = MI->getOperand(0).getReg();
   5678   unsigned ptr     = MI->getOperand(1).getReg();
   5679   unsigned oldval  = MI->getOperand(2).getReg();
   5680   unsigned newval  = MI->getOperand(3).getReg();
   5681   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5682   DebugLoc dl = MI->getDebugLoc();
   5683   bool isThumb2 = Subtarget->isThumb2();
   5684 
   5685   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5686   unsigned scratch = MRI.createVirtualRegister(isThumb2 ?
   5687     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   5688     (const TargetRegisterClass*)&ARM::GPRRegClass);
   5689 
   5690   if (isThumb2) {
   5691     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5692     MRI.constrainRegClass(oldval, &ARM::rGPRRegClass);
   5693     MRI.constrainRegClass(newval, &ARM::rGPRRegClass);
   5694   }
   5695 
   5696   unsigned ldrOpc, strOpc;
   5697   switch (Size) {
   5698   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5699   case 1:
   5700     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5701     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
   5702     break;
   5703   case 2:
   5704     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
   5705     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
   5706     break;
   5707   case 4:
   5708     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
   5709     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
   5710     break;
   5711   }
   5712 
   5713   MachineFunction *MF = BB->getParent();
   5714   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5715   MachineFunction::iterator It = BB;
   5716   ++It; // insert the new blocks after the current block
   5717 
   5718   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5719   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5720   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5721   MF->insert(It, loop1MBB);
   5722   MF->insert(It, loop2MBB);
   5723   MF->insert(It, exitMBB);
   5724 
   5725   // Transfer the remainder of BB and its successor edges to exitMBB.
   5726   exitMBB->splice(exitMBB->begin(), BB,
   5727                   llvm::next(MachineBasicBlock::iterator(MI)),
   5728                   BB->end());
   5729   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5730 
   5731   //  thisMBB:
   5732   //   ...
   5733   //   fallthrough --> loop1MBB
   5734   BB->addSuccessor(loop1MBB);
   5735 
   5736   // loop1MBB:
   5737   //   ldrex dest, [ptr]
   5738   //   cmp dest, oldval
   5739   //   bne exitMBB
   5740   BB = loop1MBB;
   5741   MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
   5742   if (ldrOpc == ARM::t2LDREX)
   5743     MIB.addImm(0);
   5744   AddDefaultPred(MIB);
   5745   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
   5746                  .addReg(dest).addReg(oldval));
   5747   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5748     .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5749   BB->addSuccessor(loop2MBB);
   5750   BB->addSuccessor(exitMBB);
   5751 
   5752   // loop2MBB:
   5753   //   strex scratch, newval, [ptr]
   5754   //   cmp scratch, #0
   5755   //   bne loop1MBB
   5756   BB = loop2MBB;
   5757   MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr);
   5758   if (strOpc == ARM::t2STREX)
   5759     MIB.addImm(0);
   5760   AddDefaultPred(MIB);
   5761   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   5762                  .addReg(scratch).addImm(0));
   5763   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5764     .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5765   BB->addSuccessor(loop1MBB);
   5766   BB->addSuccessor(exitMBB);
   5767 
   5768   //  exitMBB:
   5769   //   ...
   5770   BB = exitMBB;
   5771 
   5772   MI->eraseFromParent();   // The instruction is gone now.
   5773 
   5774   return BB;
   5775 }
   5776 
   5777 MachineBasicBlock *
   5778 ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   5779                                     unsigned Size, unsigned BinOpcode) const {
   5780   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
   5781   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5782 
   5783   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5784   MachineFunction *MF = BB->getParent();
   5785   MachineFunction::iterator It = BB;
   5786   ++It;
   5787 
   5788   unsigned dest = MI->getOperand(0).getReg();
   5789   unsigned ptr = MI->getOperand(1).getReg();
   5790   unsigned incr = MI->getOperand(2).getReg();
   5791   DebugLoc dl = MI->getDebugLoc();
   5792   bool isThumb2 = Subtarget->isThumb2();
   5793 
   5794   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5795   if (isThumb2) {
   5796     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5797     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   5798   }
   5799 
   5800   unsigned ldrOpc, strOpc;
   5801   switch (Size) {
   5802   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5803   case 1:
   5804     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5805     strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB;
   5806     break;
   5807   case 2:
   5808     ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
   5809     strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
   5810     break;
   5811   case 4:
   5812     ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
   5813     strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
   5814     break;
   5815   }
   5816 
   5817   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5818   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
   5819   MF->insert(It, loopMBB);
   5820   MF->insert(It, exitMBB);
   5821 
   5822   // Transfer the remainder of BB and its successor edges to exitMBB.
   5823   exitMBB->splice(exitMBB->begin(), BB,
   5824                   llvm::next(MachineBasicBlock::iterator(MI)),
   5825                   BB->end());
   5826   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
   5827 
   5828   const TargetRegisterClass *TRC = isThumb2 ?
   5829     (const TargetRegisterClass*)&ARM::rGPRRegClass :
   5830     (const TargetRegisterClass*)&ARM::GPRRegClass;
   5831   unsigned scratch = MRI.createVirtualRegister(TRC);
   5832   unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
   5833 
   5834   //  thisMBB:
   5835   //   ...
   5836   //   fallthrough --> loopMBB
   5837   BB->addSuccessor(loopMBB);
   5838 
   5839   //  loopMBB:
   5840   //   ldrex dest, ptr
   5841   //   <binop> scratch2, dest, incr
   5842   //   strex scratch, scratch2, ptr
   5843   //   cmp scratch, #0
   5844   //   bne- loopMBB
   5845   //   fallthrough --> exitMBB
   5846   BB = loopMBB;
   5847   MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
   5848   if (ldrOpc == ARM::t2LDREX)
   5849     MIB.addImm(0);
   5850   AddDefaultPred(MIB);
   5851   if (BinOpcode) {
   5852     // operand order needs to go the other way for NAND
   5853     if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
   5854       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
   5855                      addReg(incr).addReg(dest)).addReg(0);
   5856     else
   5857       AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
   5858                      addReg(dest).addReg(incr)).addReg(0);
   5859   }
   5860 
   5861   MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr);
   5862   if (strOpc == ARM::t2STREX)
   5863     MIB.addImm(0);
   5864   AddDefaultPred(MIB);
   5865   AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
   5866                  .addReg(scratch).addImm(0));
   5867   BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
   5868     .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
   5869 
   5870   BB->addSuccessor(loopMBB);
   5871   BB->addSuccessor(exitMBB);
   5872 
   5873   //  exitMBB:
   5874   //   ...
   5875   BB = exitMBB;
   5876 
   5877   MI->eraseFromParent();   // The instruction is gone now.
   5878 
   5879   return BB;
   5880 }
   5881 
   5882 MachineBasicBlock *
   5883 ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI,
   5884                                           MachineBasicBlock *BB,
   5885                                           unsigned Size,
   5886                                           bool signExtend,
   5887                                           ARMCC::CondCodes Cond) const {
   5888   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   5889 
   5890   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   5891   MachineFunction *MF = BB->getParent();
   5892   MachineFunction::iterator It = BB;
   5893   ++It;
   5894 
   5895   unsigned dest = MI->getOperand(0).getReg();
   5896   unsigned ptr = MI->getOperand(1).getReg();
   5897   unsigned incr = MI->getOperand(2).getReg();
   5898   unsigned oldval = dest;
   5899   DebugLoc dl = MI->getDebugLoc();
   5900   bool isThumb2 = Subtarget->isThumb2();
   5901 
   5902   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
   5903   if (isThumb2) {
   5904     MRI.constrainRegClass(dest, &ARM::rGPRRegClass);
   5905     MRI.constrainRegClass(ptr, &ARM::rGPRRegClass);
   5906   }
   5907 
   5908   unsigned ldrOpc, strOpc, extendOpc;
   5909   switch (Size) {
   5910   default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
   5911   case 1:
   5912     ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
   5913     strOpc = isThumb2 ? ARM::t2STREXB :