Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation -----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that AArch64 uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #define DEBUG_TYPE "aarch64-isel"
     16 #include "AArch64.h"
     17 #include "AArch64ISelLowering.h"
     18 #include "AArch64MachineFunctionInfo.h"
     19 #include "AArch64TargetMachine.h"
     20 #include "AArch64TargetObjectFile.h"
     21 #include "Utils/AArch64BaseInfo.h"
     22 #include "llvm/CodeGen/Analysis.h"
     23 #include "llvm/CodeGen/CallingConvLower.h"
     24 #include "llvm/CodeGen/MachineFrameInfo.h"
     25 #include "llvm/CodeGen/MachineInstrBuilder.h"
     26 #include "llvm/CodeGen/MachineRegisterInfo.h"
     27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     28 #include "llvm/IR/CallingConv.h"
     29 
     30 using namespace llvm;
     31 
     32 static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) {
     33   const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
     34 
     35   if (Subtarget->isTargetLinux())
     36     return new AArch64LinuxTargetObjectFile();
     37   if (Subtarget->isTargetELF())
     38     return new TargetLoweringObjectFileELF();
     39   llvm_unreachable("unknown subtarget type");
     40 }
     41 
     42 AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
     43   : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) {
     44 
     45   const AArch64Subtarget *Subtarget = &TM.getSubtarget<AArch64Subtarget>();
     46 
     47   // SIMD compares set the entire lane's bits to 1
     48   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
     49 
     50   // Scalar register <-> type mapping
     51   addRegisterClass(MVT::i32, &AArch64::GPR32RegClass);
     52   addRegisterClass(MVT::i64, &AArch64::GPR64RegClass);
     53   addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
     54   addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
     55   addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
     56   addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
     57 
     58   if (Subtarget->hasNEON()) {
     59     // And the vectors
     60     addRegisterClass(MVT::v8i8, &AArch64::VPR64RegClass);
     61     addRegisterClass(MVT::v4i16, &AArch64::VPR64RegClass);
     62     addRegisterClass(MVT::v2i32, &AArch64::VPR64RegClass);
     63     addRegisterClass(MVT::v1i64, &AArch64::VPR64RegClass);
     64     addRegisterClass(MVT::v2f32, &AArch64::VPR64RegClass);
     65     addRegisterClass(MVT::v16i8, &AArch64::VPR128RegClass);
     66     addRegisterClass(MVT::v8i16, &AArch64::VPR128RegClass);
     67     addRegisterClass(MVT::v4i32, &AArch64::VPR128RegClass);
     68     addRegisterClass(MVT::v2i64, &AArch64::VPR128RegClass);
     69     addRegisterClass(MVT::v4f32, &AArch64::VPR128RegClass);
     70     addRegisterClass(MVT::v2f64, &AArch64::VPR128RegClass);
     71   }
     72 
     73   computeRegisterProperties();
     74 
     75   // We combine OR nodes for bitfield and NEON BSL operations.
     76   setTargetDAGCombine(ISD::OR);
     77 
     78   setTargetDAGCombine(ISD::AND);
     79   setTargetDAGCombine(ISD::SRA);
     80 
     81   // AArch64 does not have i1 loads, or much of anything for i1 really.
     82   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
     83   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
     84   setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
     85 
     86   setStackPointerRegisterToSaveRestore(AArch64::XSP);
     87   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
     88   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
     89   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
     90 
     91   // We'll lower globals to wrappers for selection.
     92   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
     93   setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
     94 
     95   // A64 instructions have the comparison predicate attached to the user of the
     96   // result, but having a separate comparison is valuable for matching.
     97   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
     98   setOperationAction(ISD::BR_CC, MVT::i64, Custom);
     99   setOperationAction(ISD::BR_CC, MVT::f32, Custom);
    100   setOperationAction(ISD::BR_CC, MVT::f64, Custom);
    101 
    102   setOperationAction(ISD::SELECT, MVT::i32, Custom);
    103   setOperationAction(ISD::SELECT, MVT::i64, Custom);
    104   setOperationAction(ISD::SELECT, MVT::f32, Custom);
    105   setOperationAction(ISD::SELECT, MVT::f64, Custom);
    106 
    107   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
    108   setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
    109   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
    110   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
    111 
    112   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
    113 
    114   setOperationAction(ISD::SETCC, MVT::i32, Custom);
    115   setOperationAction(ISD::SETCC, MVT::i64, Custom);
    116   setOperationAction(ISD::SETCC, MVT::f32, Custom);
    117   setOperationAction(ISD::SETCC, MVT::f64, Custom);
    118 
    119   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    120   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
    121   setOperationAction(ISD::JumpTable, MVT::i64, Custom);
    122 
    123   setOperationAction(ISD::VASTART, MVT::Other, Custom);
    124   setOperationAction(ISD::VACOPY, MVT::Other, Custom);
    125   setOperationAction(ISD::VAEND, MVT::Other, Expand);
    126   setOperationAction(ISD::VAARG, MVT::Other, Expand);
    127 
    128   setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
    129 
    130   setOperationAction(ISD::ROTL, MVT::i32, Expand);
    131   setOperationAction(ISD::ROTL, MVT::i64, Expand);
    132 
    133   setOperationAction(ISD::UREM, MVT::i32, Expand);
    134   setOperationAction(ISD::UREM, MVT::i64, Expand);
    135   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
    136   setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
    137 
    138   setOperationAction(ISD::SREM, MVT::i32, Expand);
    139   setOperationAction(ISD::SREM, MVT::i64, Expand);
    140   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
    141   setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
    142 
    143   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
    144   setOperationAction(ISD::CTPOP, MVT::i64, Expand);
    145 
    146   // Legal floating-point operations.
    147   setOperationAction(ISD::FABS, MVT::f32, Legal);
    148   setOperationAction(ISD::FABS, MVT::f64, Legal);
    149 
    150   setOperationAction(ISD::FCEIL, MVT::f32, Legal);
    151   setOperationAction(ISD::FCEIL, MVT::f64, Legal);
    152 
    153   setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
    154   setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
    155 
    156   setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
    157   setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
    158 
    159   setOperationAction(ISD::FNEG, MVT::f32, Legal);
    160   setOperationAction(ISD::FNEG, MVT::f64, Legal);
    161 
    162   setOperationAction(ISD::FRINT, MVT::f32, Legal);
    163   setOperationAction(ISD::FRINT, MVT::f64, Legal);
    164 
    165   setOperationAction(ISD::FSQRT, MVT::f32, Legal);
    166   setOperationAction(ISD::FSQRT, MVT::f64, Legal);
    167 
    168   setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
    169   setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
    170 
    171   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
    172   setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
    173   setOperationAction(ISD::ConstantFP, MVT::f128, Legal);
    174 
    175   // Illegal floating-point operations.
    176   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
    177   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    178 
    179   setOperationAction(ISD::FCOS, MVT::f32, Expand);
    180   setOperationAction(ISD::FCOS, MVT::f64, Expand);
    181 
    182   setOperationAction(ISD::FEXP, MVT::f32, Expand);
    183   setOperationAction(ISD::FEXP, MVT::f64, Expand);
    184 
    185   setOperationAction(ISD::FEXP2, MVT::f32, Expand);
    186   setOperationAction(ISD::FEXP2, MVT::f64, Expand);
    187 
    188   setOperationAction(ISD::FLOG, MVT::f32, Expand);
    189   setOperationAction(ISD::FLOG, MVT::f64, Expand);
    190 
    191   setOperationAction(ISD::FLOG2, MVT::f32, Expand);
    192   setOperationAction(ISD::FLOG2, MVT::f64, Expand);
    193 
    194   setOperationAction(ISD::FLOG10, MVT::f32, Expand);
    195   setOperationAction(ISD::FLOG10, MVT::f64, Expand);
    196 
    197   setOperationAction(ISD::FPOW, MVT::f32, Expand);
    198   setOperationAction(ISD::FPOW, MVT::f64, Expand);
    199 
    200   setOperationAction(ISD::FPOWI, MVT::f32, Expand);
    201   setOperationAction(ISD::FPOWI, MVT::f64, Expand);
    202 
    203   setOperationAction(ISD::FREM, MVT::f32, Expand);
    204   setOperationAction(ISD::FREM, MVT::f64, Expand);
    205 
    206   setOperationAction(ISD::FSIN, MVT::f32, Expand);
    207   setOperationAction(ISD::FSIN, MVT::f64, Expand);
    208 
    209   setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
    210   setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
    211 
    212   // Virtually no operation on f128 is legal, but LLVM can't expand them when
    213   // there's a valid register class, so we need custom operations in most cases.
    214   setOperationAction(ISD::FABS,       MVT::f128, Expand);
    215   setOperationAction(ISD::FADD,       MVT::f128, Custom);
    216   setOperationAction(ISD::FCOPYSIGN,  MVT::f128, Expand);
    217   setOperationAction(ISD::FCOS,       MVT::f128, Expand);
    218   setOperationAction(ISD::FDIV,       MVT::f128, Custom);
    219   setOperationAction(ISD::FMA,        MVT::f128, Expand);
    220   setOperationAction(ISD::FMUL,       MVT::f128, Custom);
    221   setOperationAction(ISD::FNEG,       MVT::f128, Expand);
    222   setOperationAction(ISD::FP_EXTEND,  MVT::f128, Expand);
    223   setOperationAction(ISD::FP_ROUND,   MVT::f128, Expand);
    224   setOperationAction(ISD::FPOW,       MVT::f128, Expand);
    225   setOperationAction(ISD::FREM,       MVT::f128, Expand);
    226   setOperationAction(ISD::FRINT,      MVT::f128, Expand);
    227   setOperationAction(ISD::FSIN,       MVT::f128, Expand);
    228   setOperationAction(ISD::FSINCOS,    MVT::f128, Expand);
    229   setOperationAction(ISD::FSQRT,      MVT::f128, Expand);
    230   setOperationAction(ISD::FSUB,       MVT::f128, Custom);
    231   setOperationAction(ISD::FTRUNC,     MVT::f128, Expand);
    232   setOperationAction(ISD::SETCC,      MVT::f128, Custom);
    233   setOperationAction(ISD::BR_CC,      MVT::f128, Custom);
    234   setOperationAction(ISD::SELECT,     MVT::f128, Expand);
    235   setOperationAction(ISD::SELECT_CC,  MVT::f128, Custom);
    236   setOperationAction(ISD::FP_EXTEND,  MVT::f128, Custom);
    237 
    238   // Lowering for many of the conversions is actually specified by the non-f128
    239   // type. The LowerXXX function will be trivial when f128 isn't involved.
    240   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    241   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
    242   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
    243   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    244   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
    245   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
    246   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
    247   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
    248   setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
    249   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
    250   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
    251   setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
    252   setOperationAction(ISD::FP_ROUND,  MVT::f32, Custom);
    253   setOperationAction(ISD::FP_ROUND,  MVT::f64, Custom);
    254 
    255   // This prevents LLVM trying to compress double constants into a floating
    256   // constant-pool entry and trying to load from there. It's of doubtful benefit
    257   // for A64: we'd need LDR followed by FCVT, I believe.
    258   setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
    259   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
    260   setLoadExtAction(ISD::EXTLOAD, MVT::f16, Expand);
    261 
    262   setTruncStoreAction(MVT::f128, MVT::f64, Expand);
    263   setTruncStoreAction(MVT::f128, MVT::f32, Expand);
    264   setTruncStoreAction(MVT::f128, MVT::f16, Expand);
    265   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    266   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    267   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    268 
    269   setExceptionPointerRegister(AArch64::X0);
    270   setExceptionSelectorRegister(AArch64::X1);
    271 
    272   if (Subtarget->hasNEON()) {
    273     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom);
    274     setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom);
    275     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom);
    276     setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom);
    277     setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom);
    278     setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom);
    279     setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom);
    280     setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
    281     setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom);
    282     setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
    283     setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
    284 
    285     setOperationAction(ISD::SETCC, MVT::v8i8, Custom);
    286     setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
    287     setOperationAction(ISD::SETCC, MVT::v4i16, Custom);
    288     setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
    289     setOperationAction(ISD::SETCC, MVT::v2i32, Custom);
    290     setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
    291     setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
    292     setOperationAction(ISD::SETCC, MVT::v2f32, Custom);
    293     setOperationAction(ISD::SETCC, MVT::v4f32, Custom);
    294     setOperationAction(ISD::SETCC, MVT::v2f64, Custom);
    295   }
    296 }
    297 
    298 EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
    299   // It's reasonably important that this value matches the "natural" legal
    300   // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself
    301   // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64).
    302   if (!VT.isVector()) return MVT::i32;
    303   return VT.changeVectorElementTypeToInteger();
    304 }
    305 
    306 static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
    307                                   unsigned &LdrOpc,
    308                                   unsigned &StrOpc) {
    309   static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
    310                                        AArch64::LDXR_word, AArch64::LDXR_dword};
    311   static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
    312                                      AArch64::LDAXR_word, AArch64::LDAXR_dword};
    313   static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
    314                                        AArch64::STXR_word, AArch64::STXR_dword};
    315   static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword,
    316                                      AArch64::STLXR_word, AArch64::STLXR_dword};
    317 
    318   const unsigned *LoadOps, *StoreOps;
    319   if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
    320     LoadOps = LoadAcqs;
    321   else
    322     LoadOps = LoadBares;
    323 
    324   if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
    325     StoreOps = StoreRels;
    326   else
    327     StoreOps = StoreBares;
    328 
    329   assert(isPowerOf2_32(Size) && Size <= 8 &&
    330          "unsupported size for atomic binary op!");
    331 
    332   LdrOpc = LoadOps[Log2_32(Size)];
    333   StrOpc = StoreOps[Log2_32(Size)];
    334 }
    335 
    336 MachineBasicBlock *
    337 AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
    338                                         unsigned Size,
    339                                         unsigned BinOpcode) const {
    340   // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
    341   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    342 
    343   const BasicBlock *LLVM_BB = BB->getBasicBlock();
    344   MachineFunction *MF = BB->getParent();
    345   MachineFunction::iterator It = BB;
    346   ++It;
    347 
    348   unsigned dest = MI->getOperand(0).getReg();
    349   unsigned ptr = MI->getOperand(1).getReg();
    350   unsigned incr = MI->getOperand(2).getReg();
    351   AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
    352   DebugLoc dl = MI->getDebugLoc();
    353 
    354   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
    355 
    356   unsigned ldrOpc, strOpc;
    357   getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
    358 
    359   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    360   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    361   MF->insert(It, loopMBB);
    362   MF->insert(It, exitMBB);
    363 
    364   // Transfer the remainder of BB and its successor edges to exitMBB.
    365   exitMBB->splice(exitMBB->begin(), BB,
    366                   llvm::next(MachineBasicBlock::iterator(MI)),
    367                   BB->end());
    368   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
    369 
    370   const TargetRegisterClass *TRC
    371     = Size == 8 ? &AArch64::GPR64RegClass : &AArch64::GPR32RegClass;
    372   unsigned scratch = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC);
    373 
    374   //  thisMBB:
    375   //   ...
    376   //   fallthrough --> loopMBB
    377   BB->addSuccessor(loopMBB);
    378 
    379   //  loopMBB:
    380   //   ldxr dest, ptr
    381   //   <binop> scratch, dest, incr
    382   //   stxr stxr_status, scratch, ptr
    383   //   cbnz stxr_status, loopMBB
    384   //   fallthrough --> exitMBB
    385   BB = loopMBB;
    386   BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
    387   if (BinOpcode) {
    388     // All arithmetic operations we'll be creating are designed to take an extra
    389     // shift or extend operand, which we can conveniently set to zero.
    390 
    391     // Operand order needs to go the other way for NAND.
    392     if (BinOpcode == AArch64::BICwww_lsl || BinOpcode == AArch64::BICxxx_lsl)
    393       BuildMI(BB, dl, TII->get(BinOpcode), scratch)
    394         .addReg(incr).addReg(dest).addImm(0);
    395     else
    396       BuildMI(BB, dl, TII->get(BinOpcode), scratch)
    397         .addReg(dest).addReg(incr).addImm(0);
    398   }
    399 
    400   // From the stxr, the register is GPR32; from the cmp it's GPR32wsp
    401   unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
    402   MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
    403 
    404   BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(scratch).addReg(ptr);
    405   BuildMI(BB, dl, TII->get(AArch64::CBNZw))
    406     .addReg(stxr_status).addMBB(loopMBB);
    407 
    408   BB->addSuccessor(loopMBB);
    409   BB->addSuccessor(exitMBB);
    410 
    411   //  exitMBB:
    412   //   ...
    413   BB = exitMBB;
    414 
    415   MI->eraseFromParent();   // The instruction is gone now.
    416 
    417   return BB;
    418 }
    419 
    420 MachineBasicBlock *
    421 AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
    422                                               MachineBasicBlock *BB,
    423                                               unsigned Size,
    424                                               unsigned CmpOp,
    425                                               A64CC::CondCodes Cond) const {
    426   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    427 
    428   const BasicBlock *LLVM_BB = BB->getBasicBlock();
    429   MachineFunction *MF = BB->getParent();
    430   MachineFunction::iterator It = BB;
    431   ++It;
    432 
    433   unsigned dest = MI->getOperand(0).getReg();
    434   unsigned ptr = MI->getOperand(1).getReg();
    435   unsigned incr = MI->getOperand(2).getReg();
    436   AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
    437 
    438   unsigned oldval = dest;
    439   DebugLoc dl = MI->getDebugLoc();
    440 
    441   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
    442   const TargetRegisterClass *TRC, *TRCsp;
    443   if (Size == 8) {
    444     TRC = &AArch64::GPR64RegClass;
    445     TRCsp = &AArch64::GPR64xspRegClass;
    446   } else {
    447     TRC = &AArch64::GPR32RegClass;
    448     TRCsp = &AArch64::GPR32wspRegClass;
    449   }
    450 
    451   unsigned ldrOpc, strOpc;
    452   getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
    453 
    454   MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    455   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    456   MF->insert(It, loopMBB);
    457   MF->insert(It, exitMBB);
    458 
    459   // Transfer the remainder of BB and its successor edges to exitMBB.
    460   exitMBB->splice(exitMBB->begin(), BB,
    461                   llvm::next(MachineBasicBlock::iterator(MI)),
    462                   BB->end());
    463   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
    464 
    465   unsigned scratch = MRI.createVirtualRegister(TRC);
    466   MRI.constrainRegClass(scratch, TRCsp);
    467 
    468   //  thisMBB:
    469   //   ...
    470   //   fallthrough --> loopMBB
    471   BB->addSuccessor(loopMBB);
    472 
    473   //  loopMBB:
    474   //   ldxr dest, ptr
    475   //   cmp incr, dest (, sign extend if necessary)
    476   //   csel scratch, dest, incr, cond
    477   //   stxr stxr_status, scratch, ptr
    478   //   cbnz stxr_status, loopMBB
    479   //   fallthrough --> exitMBB
    480   BB = loopMBB;
    481   BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
    482 
    483   // Build compare and cmov instructions.
    484   MRI.constrainRegClass(incr, TRCsp);
    485   BuildMI(BB, dl, TII->get(CmpOp))
    486     .addReg(incr).addReg(oldval).addImm(0);
    487 
    488   BuildMI(BB, dl, TII->get(Size == 8 ? AArch64::CSELxxxc : AArch64::CSELwwwc),
    489           scratch)
    490     .addReg(oldval).addReg(incr).addImm(Cond);
    491 
    492   unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
    493   MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
    494 
    495   BuildMI(BB, dl, TII->get(strOpc), stxr_status)
    496     .addReg(scratch).addReg(ptr);
    497   BuildMI(BB, dl, TII->get(AArch64::CBNZw))
    498     .addReg(stxr_status).addMBB(loopMBB);
    499 
    500   BB->addSuccessor(loopMBB);
    501   BB->addSuccessor(exitMBB);
    502 
    503   //  exitMBB:
    504   //   ...
    505   BB = exitMBB;
    506 
    507   MI->eraseFromParent();   // The instruction is gone now.
    508 
    509   return BB;
    510 }
    511 
    512 MachineBasicBlock *
    513 AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
    514                                          MachineBasicBlock *BB,
    515                                          unsigned Size) const {
    516   unsigned dest    = MI->getOperand(0).getReg();
    517   unsigned ptr     = MI->getOperand(1).getReg();
    518   unsigned oldval  = MI->getOperand(2).getReg();
    519   unsigned newval  = MI->getOperand(3).getReg();
    520   AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
    521   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    522   DebugLoc dl = MI->getDebugLoc();
    523 
    524   MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
    525   const TargetRegisterClass *TRCsp;
    526   TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
    527 
    528   unsigned ldrOpc, strOpc;
    529   getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
    530 
    531   MachineFunction *MF = BB->getParent();
    532   const BasicBlock *LLVM_BB = BB->getBasicBlock();
    533   MachineFunction::iterator It = BB;
    534   ++It; // insert the new blocks after the current block
    535 
    536   MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
    537   MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
    538   MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
    539   MF->insert(It, loop1MBB);
    540   MF->insert(It, loop2MBB);
    541   MF->insert(It, exitMBB);
    542 
    543   // Transfer the remainder of BB and its successor edges to exitMBB.
    544   exitMBB->splice(exitMBB->begin(), BB,
    545                   llvm::next(MachineBasicBlock::iterator(MI)),
    546                   BB->end());
    547   exitMBB->transferSuccessorsAndUpdatePHIs(BB);
    548 
    549   //  thisMBB:
    550   //   ...
    551   //   fallthrough --> loop1MBB
    552   BB->addSuccessor(loop1MBB);
    553 
    554   // loop1MBB:
    555   //   ldxr dest, [ptr]
    556   //   cmp dest, oldval
    557   //   b.ne exitMBB
    558   BB = loop1MBB;
    559   BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr);
    560 
    561   unsigned CmpOp = Size == 8 ? AArch64::CMPxx_lsl : AArch64::CMPww_lsl;
    562   MRI.constrainRegClass(dest, TRCsp);
    563   BuildMI(BB, dl, TII->get(CmpOp))
    564     .addReg(dest).addReg(oldval).addImm(0);
    565   BuildMI(BB, dl, TII->get(AArch64::Bcc))
    566     .addImm(A64CC::NE).addMBB(exitMBB);
    567   BB->addSuccessor(loop2MBB);
    568   BB->addSuccessor(exitMBB);
    569 
    570   // loop2MBB:
    571   //   strex stxr_status, newval, [ptr]
    572   //   cbnz stxr_status, loop1MBB
    573   BB = loop2MBB;
    574   unsigned stxr_status = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
    575   MRI.constrainRegClass(stxr_status, &AArch64::GPR32wspRegClass);
    576 
    577   BuildMI(BB, dl, TII->get(strOpc), stxr_status).addReg(newval).addReg(ptr);
    578   BuildMI(BB, dl, TII->get(AArch64::CBNZw))
    579     .addReg(stxr_status).addMBB(loop1MBB);
    580   BB->addSuccessor(loop1MBB);
    581   BB->addSuccessor(exitMBB);
    582 
    583   //  exitMBB:
    584   //   ...
    585   BB = exitMBB;
    586 
    587   MI->eraseFromParent();   // The instruction is gone now.
    588 
    589   return BB;
    590 }
    591 
    592 MachineBasicBlock *
    593 AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI,
    594                                     MachineBasicBlock *MBB) const {
    595   // We materialise the F128CSEL pseudo-instruction using conditional branches
    596   // and loads, giving an instruciton sequence like:
    597   //     str q0, [sp]
    598   //     b.ne IfTrue
    599   //     b Finish
    600   // IfTrue:
    601   //     str q1, [sp]
    602   // Finish:
    603   //     ldr q0, [sp]
    604   //
    605   // Using virtual registers would probably not be beneficial since COPY
    606   // instructions are expensive for f128 (there's no actual instruction to
    607   // implement them).
    608   //
    609   // An alternative would be to do an integer-CSEL on some address. E.g.:
    610   //     mov x0, sp
    611   //     add x1, sp, #16
    612   //     str q0, [x0]
    613   //     str q1, [x1]
    614   //     csel x0, x0, x1, ne
    615   //     ldr q0, [x0]
    616   //
    617   // It's unclear which approach is actually optimal.
    618   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    619   MachineFunction *MF = MBB->getParent();
    620   const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    621   DebugLoc DL = MI->getDebugLoc();
    622   MachineFunction::iterator It = MBB;
    623   ++It;
    624 
    625   unsigned DestReg = MI->getOperand(0).getReg();
    626   unsigned IfTrueReg = MI->getOperand(1).getReg();
    627   unsigned IfFalseReg = MI->getOperand(2).getReg();
    628   unsigned CondCode = MI->getOperand(3).getImm();
    629   bool NZCVKilled = MI->getOperand(4).isKill();
    630 
    631   MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
    632   MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
    633   MF->insert(It, TrueBB);
    634   MF->insert(It, EndBB);
    635 
    636   // Transfer rest of current basic-block to EndBB
    637   EndBB->splice(EndBB->begin(), MBB,
    638                 llvm::next(MachineBasicBlock::iterator(MI)),
    639                 MBB->end());
    640   EndBB->transferSuccessorsAndUpdatePHIs(MBB);
    641 
    642   // We need somewhere to store the f128 value needed.
    643   int ScratchFI = MF->getFrameInfo()->CreateSpillStackObject(16, 16);
    644 
    645   //     [... start of incoming MBB ...]
    646   //     str qIFFALSE, [sp]
    647   //     b.cc IfTrue
    648   //     b Done
    649   BuildMI(MBB, DL, TII->get(AArch64::LSFP128_STR))
    650     .addReg(IfFalseReg)
    651     .addFrameIndex(ScratchFI)
    652     .addImm(0);
    653   BuildMI(MBB, DL, TII->get(AArch64::Bcc))
    654     .addImm(CondCode)
    655     .addMBB(TrueBB);
    656   BuildMI(MBB, DL, TII->get(AArch64::Bimm))
    657     .addMBB(EndBB);
    658   MBB->addSuccessor(TrueBB);
    659   MBB->addSuccessor(EndBB);
    660 
    661   // IfTrue:
    662   //     str qIFTRUE, [sp]
    663   BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR))
    664     .addReg(IfTrueReg)
    665     .addFrameIndex(ScratchFI)
    666     .addImm(0);
    667 
    668   // Note: fallthrough. We can rely on LLVM adding a branch if it reorders the
    669   // blocks.
    670   TrueBB->addSuccessor(EndBB);
    671 
    672   // Done:
    673   //     ldr qDEST, [sp]
    674   //     [... rest of incoming MBB ...]
    675   if (!NZCVKilled)
    676     EndBB->addLiveIn(AArch64::NZCV);
    677   MachineInstr *StartOfEnd = EndBB->begin();
    678   BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg)
    679     .addFrameIndex(ScratchFI)
    680     .addImm(0);
    681 
    682   MI->eraseFromParent();
    683   return EndBB;
    684 }
    685 
    686 MachineBasicBlock *
    687 AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
    688                                                  MachineBasicBlock *MBB) const {
    689   switch (MI->getOpcode()) {
    690   default: llvm_unreachable("Unhandled instruction with custom inserter");
    691   case AArch64::F128CSEL:
    692     return EmitF128CSEL(MI, MBB);
    693   case AArch64::ATOMIC_LOAD_ADD_I8:
    694     return emitAtomicBinary(MI, MBB, 1, AArch64::ADDwww_lsl);
    695   case AArch64::ATOMIC_LOAD_ADD_I16:
    696     return emitAtomicBinary(MI, MBB, 2, AArch64::ADDwww_lsl);
    697   case AArch64::ATOMIC_LOAD_ADD_I32:
    698     return emitAtomicBinary(MI, MBB, 4, AArch64::ADDwww_lsl);
    699   case AArch64::ATOMIC_LOAD_ADD_I64:
    700     return emitAtomicBinary(MI, MBB, 8, AArch64::ADDxxx_lsl);
    701 
    702   case AArch64::ATOMIC_LOAD_SUB_I8:
    703     return emitAtomicBinary(MI, MBB, 1, AArch64::SUBwww_lsl);
    704   case AArch64::ATOMIC_LOAD_SUB_I16:
    705     return emitAtomicBinary(MI, MBB, 2, AArch64::SUBwww_lsl);
    706   case AArch64::ATOMIC_LOAD_SUB_I32:
    707     return emitAtomicBinary(MI, MBB, 4, AArch64::SUBwww_lsl);
    708   case AArch64::ATOMIC_LOAD_SUB_I64:
    709     return emitAtomicBinary(MI, MBB, 8, AArch64::SUBxxx_lsl);
    710 
    711   case AArch64::ATOMIC_LOAD_AND_I8:
    712     return emitAtomicBinary(MI, MBB, 1, AArch64::ANDwww_lsl);
    713   case AArch64::ATOMIC_LOAD_AND_I16:
    714     return emitAtomicBinary(MI, MBB, 2, AArch64::ANDwww_lsl);
    715   case AArch64::ATOMIC_LOAD_AND_I32:
    716     return emitAtomicBinary(MI, MBB, 4, AArch64::ANDwww_lsl);
    717   case AArch64::ATOMIC_LOAD_AND_I64:
    718     return emitAtomicBinary(MI, MBB, 8, AArch64::ANDxxx_lsl);
    719 
    720   case AArch64::ATOMIC_LOAD_OR_I8:
    721     return emitAtomicBinary(MI, MBB, 1, AArch64::ORRwww_lsl);
    722   case AArch64::ATOMIC_LOAD_OR_I16:
    723     return emitAtomicBinary(MI, MBB, 2, AArch64::ORRwww_lsl);
    724   case AArch64::ATOMIC_LOAD_OR_I32:
    725     return emitAtomicBinary(MI, MBB, 4, AArch64::ORRwww_lsl);
    726   case AArch64::ATOMIC_LOAD_OR_I64:
    727     return emitAtomicBinary(MI, MBB, 8, AArch64::ORRxxx_lsl);
    728 
    729   case AArch64::ATOMIC_LOAD_XOR_I8:
    730     return emitAtomicBinary(MI, MBB, 1, AArch64::EORwww_lsl);
    731   case AArch64::ATOMIC_LOAD_XOR_I16:
    732     return emitAtomicBinary(MI, MBB, 2, AArch64::EORwww_lsl);
    733   case AArch64::ATOMIC_LOAD_XOR_I32:
    734     return emitAtomicBinary(MI, MBB, 4, AArch64::EORwww_lsl);
    735   case AArch64::ATOMIC_LOAD_XOR_I64:
    736     return emitAtomicBinary(MI, MBB, 8, AArch64::EORxxx_lsl);
    737 
    738   case AArch64::ATOMIC_LOAD_NAND_I8:
    739     return emitAtomicBinary(MI, MBB, 1, AArch64::BICwww_lsl);
    740   case AArch64::ATOMIC_LOAD_NAND_I16:
    741     return emitAtomicBinary(MI, MBB, 2, AArch64::BICwww_lsl);
    742   case AArch64::ATOMIC_LOAD_NAND_I32:
    743     return emitAtomicBinary(MI, MBB, 4, AArch64::BICwww_lsl);
    744   case AArch64::ATOMIC_LOAD_NAND_I64:
    745     return emitAtomicBinary(MI, MBB, 8, AArch64::BICxxx_lsl);
    746 
    747   case AArch64::ATOMIC_LOAD_MIN_I8:
    748     return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::GT);
    749   case AArch64::ATOMIC_LOAD_MIN_I16:
    750     return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::GT);
    751   case AArch64::ATOMIC_LOAD_MIN_I32:
    752     return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::GT);
    753   case AArch64::ATOMIC_LOAD_MIN_I64:
    754     return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::GT);
    755 
    756   case AArch64::ATOMIC_LOAD_MAX_I8:
    757     return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_sxtb, A64CC::LT);
    758   case AArch64::ATOMIC_LOAD_MAX_I16:
    759     return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_sxth, A64CC::LT);
    760   case AArch64::ATOMIC_LOAD_MAX_I32:
    761     return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LT);
    762   case AArch64::ATOMIC_LOAD_MAX_I64:
    763     return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LT);
    764 
    765   case AArch64::ATOMIC_LOAD_UMIN_I8:
    766     return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::HI);
    767   case AArch64::ATOMIC_LOAD_UMIN_I16:
    768     return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::HI);
    769   case AArch64::ATOMIC_LOAD_UMIN_I32:
    770     return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::HI);
    771   case AArch64::ATOMIC_LOAD_UMIN_I64:
    772     return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::HI);
    773 
    774   case AArch64::ATOMIC_LOAD_UMAX_I8:
    775     return emitAtomicBinaryMinMax(MI, MBB, 1, AArch64::CMPww_uxtb, A64CC::LO);
    776   case AArch64::ATOMIC_LOAD_UMAX_I16:
    777     return emitAtomicBinaryMinMax(MI, MBB, 2, AArch64::CMPww_uxth, A64CC::LO);
    778   case AArch64::ATOMIC_LOAD_UMAX_I32:
    779     return emitAtomicBinaryMinMax(MI, MBB, 4, AArch64::CMPww_lsl, A64CC::LO);
    780   case AArch64::ATOMIC_LOAD_UMAX_I64:
    781     return emitAtomicBinaryMinMax(MI, MBB, 8, AArch64::CMPxx_lsl, A64CC::LO);
    782 
    783   case AArch64::ATOMIC_SWAP_I8:
    784     return emitAtomicBinary(MI, MBB, 1, 0);
    785   case AArch64::ATOMIC_SWAP_I16:
    786     return emitAtomicBinary(MI, MBB, 2, 0);
    787   case AArch64::ATOMIC_SWAP_I32:
    788     return emitAtomicBinary(MI, MBB, 4, 0);
    789   case AArch64::ATOMIC_SWAP_I64:
    790     return emitAtomicBinary(MI, MBB, 8, 0);
    791 
    792   case AArch64::ATOMIC_CMP_SWAP_I8:
    793     return emitAtomicCmpSwap(MI, MBB, 1);
    794   case AArch64::ATOMIC_CMP_SWAP_I16:
    795     return emitAtomicCmpSwap(MI, MBB, 2);
    796   case AArch64::ATOMIC_CMP_SWAP_I32:
    797     return emitAtomicCmpSwap(MI, MBB, 4);
    798   case AArch64::ATOMIC_CMP_SWAP_I64:
    799     return emitAtomicCmpSwap(MI, MBB, 8);
    800   }
    801 }
    802 
    803 
    804 const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
    805   switch (Opcode) {
    806   case AArch64ISD::BR_CC:          return "AArch64ISD::BR_CC";
    807   case AArch64ISD::Call:           return "AArch64ISD::Call";
    808   case AArch64ISD::FPMOV:          return "AArch64ISD::FPMOV";
    809   case AArch64ISD::GOTLoad:        return "AArch64ISD::GOTLoad";
    810   case AArch64ISD::BFI:            return "AArch64ISD::BFI";
    811   case AArch64ISD::EXTR:           return "AArch64ISD::EXTR";
    812   case AArch64ISD::Ret:            return "AArch64ISD::Ret";
    813   case AArch64ISD::SBFX:           return "AArch64ISD::SBFX";
    814   case AArch64ISD::SELECT_CC:      return "AArch64ISD::SELECT_CC";
    815   case AArch64ISD::SETCC:          return "AArch64ISD::SETCC";
    816   case AArch64ISD::TC_RETURN:      return "AArch64ISD::TC_RETURN";
    817   case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
    818   case AArch64ISD::TLSDESCCALL:    return "AArch64ISD::TLSDESCCALL";
    819   case AArch64ISD::WrapperLarge:   return "AArch64ISD::WrapperLarge";
    820   case AArch64ISD::WrapperSmall:   return "AArch64ISD::WrapperSmall";
    821 
    822   case AArch64ISD::NEON_BSL:
    823     return "AArch64ISD::NEON_BSL";
    824   case AArch64ISD::NEON_MOVIMM:
    825     return "AArch64ISD::NEON_MOVIMM";
    826   case AArch64ISD::NEON_MVNIMM:
    827     return "AArch64ISD::NEON_MVNIMM";
    828   case AArch64ISD::NEON_FMOVIMM:
    829     return "AArch64ISD::NEON_FMOVIMM";
    830   case AArch64ISD::NEON_CMP:
    831     return "AArch64ISD::NEON_CMP";
    832   case AArch64ISD::NEON_CMPZ:
    833     return "AArch64ISD::NEON_CMPZ";
    834   case AArch64ISD::NEON_TST:
    835     return "AArch64ISD::NEON_TST";
    836   default:
    837     return NULL;
    838   }
    839 }
    840 
    841 static const uint16_t AArch64FPRArgRegs[] = {
    842   AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
    843   AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7
    844 };
    845 static const unsigned NumFPRArgRegs = llvm::array_lengthof(AArch64FPRArgRegs);
    846 
    847 static const uint16_t AArch64ArgRegs[] = {
    848   AArch64::X0, AArch64::X1, AArch64::X2, AArch64::X3,
    849   AArch64::X4, AArch64::X5, AArch64::X6, AArch64::X7
    850 };
    851 static const unsigned NumArgRegs = llvm::array_lengthof(AArch64ArgRegs);
    852 
    853 static bool CC_AArch64NoMoreRegs(unsigned ValNo, MVT ValVT, MVT LocVT,
    854                                  CCValAssign::LocInfo LocInfo,
    855                                  ISD::ArgFlagsTy ArgFlags, CCState &State) {
    856   // Mark all remaining general purpose registers as allocated. We don't
    857   // backtrack: if (for example) an i128 gets put on the stack, no subsequent
    858   // i64 will go in registers (C.11).
    859   for (unsigned i = 0; i < NumArgRegs; ++i)
    860     State.AllocateReg(AArch64ArgRegs[i]);
    861 
    862   return false;
    863 }
    864 
    865 #include "AArch64GenCallingConv.inc"
    866 
    867 CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const {
    868 
    869   switch(CC) {
    870   default: llvm_unreachable("Unsupported calling convention");
    871   case CallingConv::Fast:
    872   case CallingConv::C:
    873     return CC_A64_APCS;
    874   }
    875 }
    876 
    877 void
    878 AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG,
    879                                            SDLoc DL, SDValue &Chain) const {
    880   MachineFunction &MF = DAG.getMachineFunction();
    881   MachineFrameInfo *MFI = MF.getFrameInfo();
    882   AArch64MachineFunctionInfo *FuncInfo
    883     = MF.getInfo<AArch64MachineFunctionInfo>();
    884 
    885   SmallVector<SDValue, 8> MemOps;
    886 
    887   unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(AArch64ArgRegs,
    888                                                          NumArgRegs);
    889   unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(AArch64FPRArgRegs,
    890                                                          NumFPRArgRegs);
    891 
    892   unsigned GPRSaveSize = 8 * (NumArgRegs - FirstVariadicGPR);
    893   int GPRIdx = 0;
    894   if (GPRSaveSize != 0) {
    895     GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
    896 
    897     SDValue FIN = DAG.getFrameIndex(GPRIdx, getPointerTy());
    898 
    899     for (unsigned i = FirstVariadicGPR; i < NumArgRegs; ++i) {
    900       unsigned VReg = MF.addLiveIn(AArch64ArgRegs[i], &AArch64::GPR64RegClass);
    901       SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
    902       SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
    903                                    MachinePointerInfo::getStack(i * 8),
    904                                    false, false, 0);
    905       MemOps.push_back(Store);
    906       FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
    907                         DAG.getConstant(8, getPointerTy()));
    908     }
    909   }
    910 
    911   unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
    912   int FPRIdx = 0;
    913   if (FPRSaveSize != 0) {
    914     FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
    915 
    916     SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy());
    917 
    918     for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
    919       unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i],
    920                                    &AArch64::FPR128RegClass);
    921       SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
    922       SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN,
    923                                    MachinePointerInfo::getStack(i * 16),
    924                                    false, false, 0);
    925       MemOps.push_back(Store);
    926       FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN,
    927                         DAG.getConstant(16, getPointerTy()));
    928     }
    929   }
    930 
    931   int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true);
    932 
    933   FuncInfo->setVariadicStackIdx(StackIdx);
    934   FuncInfo->setVariadicGPRIdx(GPRIdx);
    935   FuncInfo->setVariadicGPRSize(GPRSaveSize);
    936   FuncInfo->setVariadicFPRIdx(FPRIdx);
    937   FuncInfo->setVariadicFPRSize(FPRSaveSize);
    938 
    939   if (!MemOps.empty()) {
    940     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
    941                         MemOps.size());
    942   }
    943 }
    944 
    945 
    946 SDValue
    947 AArch64TargetLowering::LowerFormalArguments(SDValue Chain,
    948                                       CallingConv::ID CallConv, bool isVarArg,
    949                                       const SmallVectorImpl<ISD::InputArg> &Ins,
    950                                       SDLoc dl, SelectionDAG &DAG,
    951                                       SmallVectorImpl<SDValue> &InVals) const {
    952   MachineFunction &MF = DAG.getMachineFunction();
    953   AArch64MachineFunctionInfo *FuncInfo
    954     = MF.getInfo<AArch64MachineFunctionInfo>();
    955   MachineFrameInfo *MFI = MF.getFrameInfo();
    956   bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
    957 
    958   SmallVector<CCValAssign, 16> ArgLocs;
    959   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
    960                  getTargetMachine(), ArgLocs, *DAG.getContext());
    961   CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForNode(CallConv));
    962 
    963   SmallVector<SDValue, 16> ArgValues;
    964 
    965   SDValue ArgValue;
    966   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    967     CCValAssign &VA = ArgLocs[i];
    968     ISD::ArgFlagsTy Flags = Ins[i].Flags;
    969 
    970     if (Flags.isByVal()) {
    971       // Byval is used for small structs and HFAs in the PCS, but the system
    972       // should work in a non-compliant manner for larger structs.
    973       EVT PtrTy = getPointerTy();
    974       int Size = Flags.getByValSize();
    975       unsigned NumRegs = (Size + 7) / 8;
    976 
    977       unsigned FrameIdx = MFI->CreateFixedObject(8 * NumRegs,
    978                                                  VA.getLocMemOffset(),
    979                                                  false);
    980       SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrTy);
    981       InVals.push_back(FrameIdxN);
    982 
    983       continue;
    984     } else if (VA.isRegLoc()) {
    985       MVT RegVT = VA.getLocVT();
    986       const TargetRegisterClass *RC = getRegClassFor(RegVT);
    987       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
    988 
    989       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
    990     } else { // VA.isRegLoc()
    991       assert(VA.isMemLoc());
    992 
    993       int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
    994                                       VA.getLocMemOffset(), true);
    995 
    996       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
    997       ArgValue = DAG.getLoad(VA.getLocVT(), dl, Chain, FIN,
    998                              MachinePointerInfo::getFixedStack(FI),
    999                              false, false, false, 0);
   1000 
   1001 
   1002     }
   1003 
   1004     switch (VA.getLocInfo()) {
   1005     default: llvm_unreachable("Unknown loc info!");
   1006     case CCValAssign::Full: break;
   1007     case CCValAssign::BCvt:
   1008       ArgValue = DAG.getNode(ISD::BITCAST,dl, VA.getValVT(), ArgValue);
   1009       break;
   1010     case CCValAssign::SExt:
   1011     case CCValAssign::ZExt:
   1012     case CCValAssign::AExt: {
   1013       unsigned DestSize = VA.getValVT().getSizeInBits();
   1014       unsigned DestSubReg;
   1015 
   1016       switch (DestSize) {
   1017       case 8: DestSubReg = AArch64::sub_8; break;
   1018       case 16: DestSubReg = AArch64::sub_16; break;
   1019       case 32: DestSubReg = AArch64::sub_32; break;
   1020       case 64: DestSubReg = AArch64::sub_64; break;
   1021       default: llvm_unreachable("Unexpected argument promotion");
   1022       }
   1023 
   1024       ArgValue = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
   1025                                    VA.getValVT(), ArgValue,
   1026                                    DAG.getTargetConstant(DestSubReg, MVT::i32)),
   1027                          0);
   1028       break;
   1029     }
   1030     }
   1031 
   1032     InVals.push_back(ArgValue);
   1033   }
   1034 
   1035   if (isVarArg)
   1036     SaveVarArgRegisters(CCInfo, DAG, dl, Chain);
   1037 
   1038   unsigned StackArgSize = CCInfo.getNextStackOffset();
   1039   if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
   1040     // This is a non-standard ABI so by fiat I say we're allowed to make full
   1041     // use of the stack area to be popped, which must be aligned to 16 bytes in
   1042     // any case:
   1043     StackArgSize = RoundUpToAlignment(StackArgSize, 16);
   1044 
   1045     // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
   1046     // a multiple of 16.
   1047     FuncInfo->setArgumentStackToRestore(StackArgSize);
   1048 
   1049     // This realignment carries over to the available bytes below. Our own
   1050     // callers will guarantee the space is free by giving an aligned value to
   1051     // CALLSEQ_START.
   1052   }
   1053   // Even if we're not expected to free up the space, it's useful to know how
   1054   // much is there while considering tail calls (because we can reuse it).
   1055   FuncInfo->setBytesInStackArgArea(StackArgSize);
   1056 
   1057   return Chain;
   1058 }
   1059 
   1060 SDValue
   1061 AArch64TargetLowering::LowerReturn(SDValue Chain,
   1062                                    CallingConv::ID CallConv, bool isVarArg,
   1063                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
   1064                                    const SmallVectorImpl<SDValue> &OutVals,
   1065                                    SDLoc dl, SelectionDAG &DAG) const {
   1066   // CCValAssign - represent the assignment of the return value to a location.
   1067   SmallVector<CCValAssign, 16> RVLocs;
   1068 
   1069   // CCState - Info about the registers and stack slots.
   1070   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
   1071                  getTargetMachine(), RVLocs, *DAG.getContext());
   1072 
   1073   // Analyze outgoing return values.
   1074   CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv));
   1075 
   1076   SDValue Flag;
   1077   SmallVector<SDValue, 4> RetOps(1, Chain);
   1078 
   1079   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
   1080     // PCS: "If the type, T, of the result of a function is such that
   1081     // void func(T arg) would require that arg be passed as a value in a
   1082     // register (or set of registers) according to the rules in 5.4, then the
   1083     // result is returned in the same registers as would be used for such an
   1084     // argument.
   1085     //
   1086     // Otherwise, the caller shall reserve a block of memory of sufficient
   1087     // size and alignment to hold the result. The address of the memory block
   1088     // shall be passed as an additional argument to the function in x8."
   1089     //
   1090     // This is implemented in two places. The register-return values are dealt
   1091     // with here, more complex returns are passed as an sret parameter, which
   1092     // means we don't have to worry about it during actual return.
   1093     CCValAssign &VA = RVLocs[i];
   1094     assert(VA.isRegLoc() && "Only register-returns should be created by PCS");
   1095 
   1096 
   1097     SDValue Arg = OutVals[i];
   1098 
   1099     // There's no convenient note in the ABI about this as there is for normal
   1100     // arguments, but it says return values are passed in the same registers as
   1101     // an argument would be. I believe that includes the comments about
   1102     // unspecified higher bits, putting the burden of widening on the *caller*
   1103     // for return values.
   1104     switch (VA.getLocInfo()) {
   1105     default: llvm_unreachable("Unknown loc info");
   1106     case CCValAssign::Full: break;
   1107     case CCValAssign::SExt:
   1108     case CCValAssign::ZExt:
   1109     case CCValAssign::AExt:
   1110       // Floating-point values should only be extended when they're going into
   1111       // memory, which can't happen here so an integer extend is acceptable.
   1112       Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
   1113       break;
   1114     case CCValAssign::BCvt:
   1115       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1116       break;
   1117     }
   1118 
   1119     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
   1120     Flag = Chain.getValue(1);
   1121     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   1122   }
   1123 
   1124   RetOps[0] = Chain;  // Update chain.
   1125 
   1126   // Add the flag if we have it.
   1127   if (Flag.getNode())
   1128     RetOps.push_back(Flag);
   1129 
   1130   return DAG.getNode(AArch64ISD::Ret, dl, MVT::Other,
   1131                      &RetOps[0], RetOps.size());
   1132 }
   1133 
   1134 SDValue
   1135 AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   1136                                  SmallVectorImpl<SDValue> &InVals) const {
   1137   SelectionDAG &DAG                     = CLI.DAG;
   1138   SDLoc &dl                             = CLI.DL;
   1139   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   1140   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
   1141   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   1142   SDValue Chain                         = CLI.Chain;
   1143   SDValue Callee                        = CLI.Callee;
   1144   bool &IsTailCall                      = CLI.IsTailCall;
   1145   CallingConv::ID CallConv              = CLI.CallConv;
   1146   bool IsVarArg                         = CLI.IsVarArg;
   1147 
   1148   MachineFunction &MF = DAG.getMachineFunction();
   1149   AArch64MachineFunctionInfo *FuncInfo
   1150     = MF.getInfo<AArch64MachineFunctionInfo>();
   1151   bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
   1152   bool IsStructRet = !Outs.empty() && Outs[0].Flags.isSRet();
   1153   bool IsSibCall = false;
   1154 
   1155   if (IsTailCall) {
   1156     IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   1157                     IsVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(),
   1158                                                    Outs, OutVals, Ins, DAG);
   1159 
   1160     // A sibling call is one where we're under the usual C ABI and not planning
   1161     // to change that but can still do a tail call:
   1162     if (!TailCallOpt && IsTailCall)
   1163       IsSibCall = true;
   1164   }
   1165 
   1166   SmallVector<CCValAssign, 16> ArgLocs;
   1167   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
   1168                  getTargetMachine(), ArgLocs, *DAG.getContext());
   1169   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
   1170 
   1171   // On AArch64 (and all other architectures I'm aware of) the most this has to
   1172   // do is adjust the stack pointer.
   1173   unsigned NumBytes = RoundUpToAlignment(CCInfo.getNextStackOffset(), 16);
   1174   if (IsSibCall) {
   1175     // Since we're not changing the ABI to make this a tail call, the memory
   1176     // operands are already available in the caller's incoming argument space.
   1177     NumBytes = 0;
   1178   }
   1179 
   1180   // FPDiff is the byte offset of the call's argument area from the callee's.
   1181   // Stores to callee stack arguments will be placed in FixedStackSlots offset
   1182   // by this amount for a tail call. In a sibling call it must be 0 because the
   1183   // caller will deallocate the entire stack and the callee still expects its
   1184   // arguments to begin at SP+0. Completely unused for non-tail calls.
   1185   int FPDiff = 0;
   1186 
   1187   if (IsTailCall && !IsSibCall) {
   1188     unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
   1189 
   1190     // FPDiff will be negative if this tail call requires more space than we
   1191     // would automatically have in our incoming argument space. Positive if we
   1192     // can actually shrink the stack.
   1193     FPDiff = NumReusableBytes - NumBytes;
   1194 
   1195     // The stack pointer must be 16-byte aligned at all times it's used for a
   1196     // memory operation, which in practice means at *all* times and in
   1197     // particular across call boundaries. Therefore our own arguments started at
   1198     // a 16-byte aligned SP and the delta applied for the tail call should
   1199     // satisfy the same constraint.
   1200     assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
   1201   }
   1202 
   1203   if (!IsSibCall)
   1204     Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1205                                  dl);
   1206 
   1207   SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP,
   1208                                         getPointerTy());
   1209 
   1210   SmallVector<SDValue, 8> MemOpChains;
   1211   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
   1212 
   1213   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1214     CCValAssign &VA = ArgLocs[i];
   1215     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   1216     SDValue Arg = OutVals[i];
   1217 
   1218     // Callee does the actual widening, so all extensions just use an implicit
   1219     // definition of the rest of the Loc. Aesthetically, this would be nicer as
   1220     // an ANY_EXTEND, but that isn't valid for floating-point types and this
   1221     // alternative works on integer types too.
   1222     switch (VA.getLocInfo()) {
   1223     default: llvm_unreachable("Unknown loc info!");
   1224     case CCValAssign::Full: break;
   1225     case CCValAssign::SExt:
   1226     case CCValAssign::ZExt:
   1227     case CCValAssign::AExt: {
   1228       unsigned SrcSize = VA.getValVT().getSizeInBits();
   1229       unsigned SrcSubReg;
   1230 
   1231       switch (SrcSize) {
   1232       case 8: SrcSubReg = AArch64::sub_8; break;
   1233       case 16: SrcSubReg = AArch64::sub_16; break;
   1234       case 32: SrcSubReg = AArch64::sub_32; break;
   1235       case 64: SrcSubReg = AArch64::sub_64; break;
   1236       default: llvm_unreachable("Unexpected argument promotion");
   1237       }
   1238 
   1239       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, dl,
   1240                                     VA.getLocVT(),
   1241                                     DAG.getUNDEF(VA.getLocVT()),
   1242                                     Arg,
   1243                                     DAG.getTargetConstant(SrcSubReg, MVT::i32)),
   1244                     0);
   1245 
   1246       break;
   1247     }
   1248     case CCValAssign::BCvt:
   1249       Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
   1250       break;
   1251     }
   1252 
   1253     if (VA.isRegLoc()) {
   1254       // A normal register (sub-) argument. For now we just note it down because
   1255       // we want to copy things into registers as late as possible to avoid
   1256       // register-pressure (and possibly worse).
   1257       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
   1258       continue;
   1259     }
   1260 
   1261     assert(VA.isMemLoc() && "unexpected argument location");
   1262 
   1263     SDValue DstAddr;
   1264     MachinePointerInfo DstInfo;
   1265     if (IsTailCall) {
   1266       uint32_t OpSize = Flags.isByVal() ? Flags.getByValSize() :
   1267                                           VA.getLocVT().getSizeInBits();
   1268       OpSize = (OpSize + 7) / 8;
   1269       int32_t Offset = VA.getLocMemOffset() + FPDiff;
   1270       int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
   1271 
   1272       DstAddr = DAG.getFrameIndex(FI, getPointerTy());
   1273       DstInfo = MachinePointerInfo::getFixedStack(FI);
   1274 
   1275       // Make sure any stack arguments overlapping with where we're storing are
   1276       // loaded before this eventual operation. Otherwise they'll be clobbered.
   1277       Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
   1278     } else {
   1279       SDValue PtrOff = DAG.getIntPtrConstant(VA.getLocMemOffset());
   1280 
   1281       DstAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
   1282       DstInfo = MachinePointerInfo::getStack(VA.getLocMemOffset());
   1283     }
   1284 
   1285     if (Flags.isByVal()) {
   1286       SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i64);
   1287       SDValue Cpy = DAG.getMemcpy(Chain, dl, DstAddr, Arg, SizeNode,
   1288                                   Flags.getByValAlign(),
   1289                                   /*isVolatile = */ false,
   1290                                   /*alwaysInline = */ false,
   1291                                   DstInfo, MachinePointerInfo(0));
   1292       MemOpChains.push_back(Cpy);
   1293     } else {
   1294       // Normal stack argument, put it where it's needed.
   1295       SDValue Store = DAG.getStore(Chain, dl, Arg, DstAddr, DstInfo,
   1296                                    false, false, 0);
   1297       MemOpChains.push_back(Store);
   1298     }
   1299   }
   1300 
   1301   // The loads and stores generated above shouldn't clash with each
   1302   // other. Combining them with this TokenFactor notes that fact for the rest of
   1303   // the backend.
   1304   if (!MemOpChains.empty())
   1305     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   1306                         &MemOpChains[0], MemOpChains.size());
   1307 
   1308   // Most of the rest of the instructions need to be glued together; we don't
   1309   // want assignments to actual registers used by a call to be rearranged by a
   1310   // well-meaning scheduler.
   1311   SDValue InFlag;
   1312 
   1313   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
   1314     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
   1315                              RegsToPass[i].second, InFlag);
   1316     InFlag = Chain.getValue(1);
   1317   }
   1318 
   1319   // The linker is responsible for inserting veneers when necessary to put a
   1320   // function call destination in range, so we don't need to bother with a
   1321   // wrapper here.
   1322   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   1323     const GlobalValue *GV = G->getGlobal();
   1324     Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
   1325   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   1326     const char *Sym = S->getSymbol();
   1327     Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy());
   1328   }
   1329 
   1330   // We don't usually want to end the call-sequence here because we would tidy
   1331   // the frame up *after* the call, however in the ABI-changing tail-call case
   1332   // we've carefully laid out the parameters so that when sp is reset they'll be
   1333   // in the correct location.
   1334   if (IsTailCall && !IsSibCall) {
   1335     Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1336                                DAG.getIntPtrConstant(0, true), InFlag, dl);
   1337     InFlag = Chain.getValue(1);
   1338   }
   1339 
   1340   // We produce the following DAG scheme for the actual call instruction:
   1341   //     (AArch64Call Chain, Callee, reg1, ..., regn, preserveMask, inflag?
   1342   //
   1343   // Most arguments aren't going to be used and just keep the values live as
   1344   // far as LLVM is concerned. It's expected to be selected as simply "bl
   1345   // callee" (for a direct, non-tail call).
   1346   std::vector<SDValue> Ops;
   1347   Ops.push_back(Chain);
   1348   Ops.push_back(Callee);
   1349 
   1350   if (IsTailCall) {
   1351     // Each tail call may have to adjust the stack by a different amount, so
   1352     // this information must travel along with the operation for eventual
   1353     // consumption by emitEpilogue.
   1354     Ops.push_back(DAG.getTargetConstant(FPDiff, MVT::i32));
   1355   }
   1356 
   1357   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
   1358     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
   1359                                   RegsToPass[i].second.getValueType()));
   1360 
   1361 
   1362   // Add a register mask operand representing the call-preserved registers. This
   1363   // is used later in codegen to constrain register-allocation.
   1364   const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
   1365   const uint32_t *Mask = TRI->getCallPreservedMask(CallConv);
   1366   assert(Mask && "Missing call preserved mask for calling convention");
   1367   Ops.push_back(DAG.getRegisterMask(Mask));
   1368 
   1369   // If we needed glue, put it in as the last argument.
   1370   if (InFlag.getNode())
   1371     Ops.push_back(InFlag);
   1372 
   1373   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1374 
   1375   if (IsTailCall) {
   1376     return DAG.getNode(AArch64ISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size());
   1377   }
   1378 
   1379   Chain = DAG.getNode(AArch64ISD::Call, dl, NodeTys, &Ops[0], Ops.size());
   1380   InFlag = Chain.getValue(1);
   1381 
   1382   // Now we can reclaim the stack, just as well do it before working out where
   1383   // our return value is.
   1384   if (!IsSibCall) {
   1385     uint64_t CalleePopBytes
   1386       = DoesCalleeRestoreStack(CallConv, TailCallOpt) ? NumBytes : 0;
   1387 
   1388     Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
   1389                                DAG.getIntPtrConstant(CalleePopBytes, true),
   1390                                InFlag, dl);
   1391     InFlag = Chain.getValue(1);
   1392   }
   1393 
   1394   return LowerCallResult(Chain, InFlag, CallConv,
   1395                          IsVarArg, Ins, dl, DAG, InVals);
   1396 }
   1397 
   1398 SDValue
   1399 AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
   1400                                       CallingConv::ID CallConv, bool IsVarArg,
   1401                                       const SmallVectorImpl<ISD::InputArg> &Ins,
   1402                                       SDLoc dl, SelectionDAG &DAG,
   1403                                       SmallVectorImpl<SDValue> &InVals) const {
   1404   // Assign locations to each value returned by this call.
   1405   SmallVector<CCValAssign, 16> RVLocs;
   1406   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(),
   1407                  getTargetMachine(), RVLocs, *DAG.getContext());
   1408   CCInfo.AnalyzeCallResult(Ins, CCAssignFnForNode(CallConv));
   1409 
   1410   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   1411     CCValAssign VA = RVLocs[i];
   1412 
   1413     // Return values that are too big to fit into registers should use an sret
   1414     // pointer, so this can be a lot simpler than the main argument code.
   1415     assert(VA.isRegLoc() && "Memory locations not expected for call return");
   1416 
   1417     SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
   1418                                      InFlag);
   1419     Chain = Val.getValue(1);
   1420     InFlag = Val.getValue(2);
   1421 
   1422     switch (VA.getLocInfo()) {
   1423     default: llvm_unreachable("Unknown loc info!");
   1424     case CCValAssign::Full: break;
   1425     case CCValAssign::BCvt:
   1426       Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
   1427       break;
   1428     case CCValAssign::ZExt:
   1429     case CCValAssign::SExt:
   1430     case CCValAssign::AExt:
   1431       // Floating-point arguments only get extended/truncated if they're going
   1432       // in memory, so using the integer operation is acceptable here.
   1433       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
   1434       break;
   1435     }
   1436 
   1437     InVals.push_back(Val);
   1438   }
   1439 
   1440   return Chain;
   1441 }
   1442 
   1443 bool
   1444 AArch64TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   1445                                     CallingConv::ID CalleeCC,
   1446                                     bool IsVarArg,
   1447                                     bool IsCalleeStructRet,
   1448                                     bool IsCallerStructRet,
   1449                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
   1450                                     const SmallVectorImpl<SDValue> &OutVals,
   1451                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   1452                                     SelectionDAG& DAG) const {
   1453 
   1454   // For CallingConv::C this function knows whether the ABI needs
   1455   // changing. That's not true for other conventions so they will have to opt in
   1456   // manually.
   1457   if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
   1458     return false;
   1459 
   1460   const MachineFunction &MF = DAG.getMachineFunction();
   1461   const Function *CallerF = MF.getFunction();
   1462   CallingConv::ID CallerCC = CallerF->getCallingConv();
   1463   bool CCMatch = CallerCC == CalleeCC;
   1464 
   1465   // Byval parameters hand the function a pointer directly into the stack area
   1466   // we want to reuse during a tail call. Working around this *is* possible (see
   1467   // X86) but less efficient and uglier in LowerCall.
   1468   for (Function::const_arg_iterator i = CallerF->arg_begin(),
   1469          e = CallerF->arg_end(); i != e; ++i)
   1470     if (i->hasByValAttr())
   1471       return false;
   1472 
   1473   if (getTargetMachine().Options.GuaranteedTailCallOpt) {
   1474     if (IsTailCallConvention(CalleeCC) && CCMatch)
   1475       return true;
   1476     return false;
   1477   }
   1478 
   1479   // Now we search for cases where we can use a tail call without changing the
   1480   // ABI. Sibcall is used in some places (particularly gcc) to refer to this
   1481   // concept.
   1482 
   1483   // I want anyone implementing a new calling convention to think long and hard
   1484   // about this assert.
   1485   assert((!IsVarArg || CalleeCC == CallingConv::C)
   1486          && "Unexpected variadic calling convention");
   1487 
   1488   if (IsVarArg && !Outs.empty()) {
   1489     // At least two cases here: if caller is fastcc then we can't have any
   1490     // memory arguments (we'd be expected to clean up the stack afterwards). If
   1491     // caller is C then we could potentially use its argument area.
   1492 
   1493     // FIXME: for now we take the most conservative of these in both cases:
   1494     // disallow all variadic memory operands.
   1495     SmallVector<CCValAssign, 16> ArgLocs;
   1496     CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
   1497                    getTargetMachine(), ArgLocs, *DAG.getContext());
   1498 
   1499     CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
   1500     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
   1501       if (!ArgLocs[i].isRegLoc())
   1502         return false;
   1503   }
   1504 
   1505   // If the calling conventions do not match, then we'd better make sure the
   1506   // results are returned in the same way as what the caller expects.
   1507   if (!CCMatch) {
   1508     SmallVector<CCValAssign, 16> RVLocs1;
   1509     CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(),
   1510                     getTargetMachine(), RVLocs1, *DAG.getContext());
   1511     CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC));
   1512 
   1513     SmallVector<CCValAssign, 16> RVLocs2;
   1514     CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(),
   1515                     getTargetMachine(), RVLocs2, *DAG.getContext());
   1516     CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC));
   1517 
   1518     if (RVLocs1.size() != RVLocs2.size())
   1519       return false;
   1520     for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) {
   1521       if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc())
   1522         return false;
   1523       if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo())
   1524         return false;
   1525       if (RVLocs1[i].isRegLoc()) {
   1526         if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg())
   1527           return false;
   1528       } else {
   1529         if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset())
   1530           return false;
   1531       }
   1532     }
   1533   }
   1534 
   1535   // Nothing more to check if the callee is taking no arguments
   1536   if (Outs.empty())
   1537     return true;
   1538 
   1539   SmallVector<CCValAssign, 16> ArgLocs;
   1540   CCState CCInfo(CalleeCC, IsVarArg, DAG.getMachineFunction(),
   1541                  getTargetMachine(), ArgLocs, *DAG.getContext());
   1542 
   1543   CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CalleeCC));
   1544 
   1545   const AArch64MachineFunctionInfo *FuncInfo
   1546     = MF.getInfo<AArch64MachineFunctionInfo>();
   1547 
   1548   // If the stack arguments for this call would fit into our own save area then
   1549   // the call can be made tail.
   1550   return CCInfo.getNextStackOffset() <= FuncInfo->getBytesInStackArgArea();
   1551 }
   1552 
   1553 bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
   1554                                                    bool TailCallOpt) const {
   1555   return CallCC == CallingConv::Fast && TailCallOpt;
   1556 }
   1557 
   1558 bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
   1559   return CallCC == CallingConv::Fast;
   1560 }
   1561 
   1562 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
   1563                                                    SelectionDAG &DAG,
   1564                                                    MachineFrameInfo *MFI,
   1565                                                    int ClobberedFI) const {
   1566   SmallVector<SDValue, 8> ArgChains;
   1567   int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
   1568   int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
   1569 
   1570   // Include the original chain at the beginning of the list. When this is
   1571   // used by target LowerCall hooks, this helps legalize find the
   1572   // CALLSEQ_BEGIN node.
   1573   ArgChains.push_back(Chain);
   1574 
   1575   // Add a chain value for each stack argument corresponding
   1576   for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
   1577          UE = DAG.getEntryNode().getNode()->use_end(); U != UE; ++U)
   1578     if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
   1579       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
   1580         if (FI->getIndex() < 0) {
   1581           int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
   1582           int64_t InLastByte = InFirstByte;
   1583           InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
   1584 
   1585           if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
   1586               (FirstByte <= InFirstByte && InFirstByte <= LastByte))
   1587             ArgChains.push_back(SDValue(L, 1));
   1588         }
   1589 
   1590    // Build a tokenfactor for all the chains.
   1591    return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other,
   1592                       &ArgChains[0], ArgChains.size());
   1593 }
   1594 
   1595 static A64CC::CondCodes IntCCToA64CC(ISD::CondCode CC) {
   1596   switch (CC) {
   1597   case ISD::SETEQ:  return A64CC::EQ;
   1598   case ISD::SETGT:  return A64CC::GT;
   1599   case ISD::SETGE:  return A64CC::GE;
   1600   case ISD::SETLT:  return A64CC::LT;
   1601   case ISD::SETLE:  return A64CC::LE;
   1602   case ISD::SETNE:  return A64CC::NE;
   1603   case ISD::SETUGT: return A64CC::HI;
   1604   case ISD::SETUGE: return A64CC::HS;
   1605   case ISD::SETULT: return A64CC::LO;
   1606   case ISD::SETULE: return A64CC::LS;
   1607   default: llvm_unreachable("Unexpected condition code");
   1608   }
   1609 }
   1610 
   1611 bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const {
   1612   // icmp is implemented using adds/subs immediate, which take an unsigned
   1613   // 12-bit immediate, optionally shifted left by 12 bits.
   1614 
   1615   // Symmetric by using adds/subs
   1616   if (Val < 0)
   1617     Val = -Val;
   1618 
   1619   return (Val & ~0xfff) == 0 || (Val & ~0xfff000) == 0;
   1620 }
   1621 
   1622 SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS,
   1623                                         ISD::CondCode CC, SDValue &A64cc,
   1624                                         SelectionDAG &DAG, SDLoc &dl) const {
   1625   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
   1626     int64_t C = 0;
   1627     EVT VT = RHSC->getValueType(0);
   1628     bool knownInvalid = false;
   1629 
   1630     // I'm not convinced the rest of LLVM handles these edge cases properly, but
   1631     // we can at least get it right.
   1632     if (isSignedIntSetCC(CC)) {
   1633       C = RHSC->getSExtValue();
   1634     } else if (RHSC->getZExtValue() > INT64_MAX) {
   1635       // A 64-bit constant not representable by a signed 64-bit integer is far
   1636       // too big to fit into a SUBS immediate anyway.
   1637       knownInvalid = true;
   1638     } else {
   1639       C = RHSC->getZExtValue();
   1640     }
   1641 
   1642     if (!knownInvalid && !isLegalICmpImmediate(C)) {
   1643       // Constant does not fit, try adjusting it by one?
   1644       switch (CC) {
   1645       default: break;
   1646       case ISD::SETLT:
   1647       case ISD::SETGE:
   1648         if (isLegalICmpImmediate(C-1)) {
   1649           CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
   1650           RHS = DAG.getConstant(C-1, VT);
   1651         }
   1652         break;
   1653       case ISD::SETULT:
   1654       case ISD::SETUGE:
   1655         if (isLegalICmpImmediate(C-1)) {
   1656           CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
   1657           RHS = DAG.getConstant(C-1, VT);
   1658         }
   1659         break;
   1660       case ISD::SETLE:
   1661       case ISD::SETGT:
   1662         if (isLegalICmpImmediate(C+1)) {
   1663           CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
   1664           RHS = DAG.getConstant(C+1, VT);
   1665         }
   1666         break;
   1667       case ISD::SETULE:
   1668       case ISD::SETUGT:
   1669         if (isLegalICmpImmediate(C+1)) {
   1670           CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
   1671           RHS = DAG.getConstant(C+1, VT);
   1672         }
   1673         break;
   1674       }
   1675     }
   1676   }
   1677 
   1678   A64CC::CondCodes CondCode = IntCCToA64CC(CC);
   1679   A64cc = DAG.getConstant(CondCode, MVT::i32);
   1680   return DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
   1681                      DAG.getCondCode(CC));
   1682 }
   1683 
   1684 static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC,
   1685                                     A64CC::CondCodes &Alternative) {
   1686   A64CC::CondCodes CondCode = A64CC::Invalid;
   1687   Alternative = A64CC::Invalid;
   1688 
   1689   switch (CC) {
   1690   default: llvm_unreachable("Unknown FP condition!");
   1691   case ISD::SETEQ:
   1692   case ISD::SETOEQ: CondCode = A64CC::EQ; break;
   1693   case ISD::SETGT:
   1694   case ISD::SETOGT: CondCode = A64CC::GT; break;
   1695   case ISD::SETGE:
   1696   case ISD::SETOGE: CondCode = A64CC::GE; break;
   1697   case ISD::SETOLT: CondCode = A64CC::MI; break;
   1698   case ISD::SETOLE: CondCode = A64CC::LS; break;
   1699   case ISD::SETONE: CondCode = A64CC::MI; Alternative = A64CC::GT; break;
   1700   case ISD::SETO:   CondCode = A64CC::VC; break;
   1701   case ISD::SETUO:  CondCode = A64CC::VS; break;
   1702   case ISD::SETUEQ: CondCode = A64CC::EQ; Alternative = A64CC::VS; break;
   1703   case ISD::SETUGT: CondCode = A64CC::HI; break;
   1704   case ISD::SETUGE: CondCode = A64CC::PL; break;
   1705   case ISD::SETLT:
   1706   case ISD::SETULT: CondCode = A64CC::LT; break;
   1707   case ISD::SETLE:
   1708   case ISD::SETULE: CondCode = A64CC::LE; break;
   1709   case ISD::SETNE:
   1710   case ISD::SETUNE: CondCode = A64CC::NE; break;
   1711   }
   1712   return CondCode;
   1713 }
   1714 
   1715 SDValue
   1716 AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   1717   SDLoc DL(Op);
   1718   EVT PtrVT = getPointerTy();
   1719   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   1720 
   1721   switch(getTargetMachine().getCodeModel()) {
   1722   case CodeModel::Small:
   1723     // The most efficient code is PC-relative anyway for the small memory model,
   1724     // so we don't need to worry about relocation model.
   1725     return DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
   1726                        DAG.getTargetBlockAddress(BA, PtrVT, 0,
   1727                                                  AArch64II::MO_NO_FLAG),
   1728                        DAG.getTargetBlockAddress(BA, PtrVT, 0,
   1729                                                  AArch64II::MO_LO12),
   1730                        DAG.getConstant(/*Alignment=*/ 4, MVT::i32));
   1731   case CodeModel::Large:
   1732     return DAG.getNode(
   1733       AArch64ISD::WrapperLarge, DL, PtrVT,
   1734       DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G3),
   1735       DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
   1736       DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
   1737       DAG.getTargetBlockAddress(BA, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
   1738   default:
   1739     llvm_unreachable("Only small and large code models supported now");
   1740   }
   1741 }
   1742 
   1743 
   1744 // (BRCOND chain, val, dest)
   1745 SDValue
   1746 AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   1747   SDLoc dl(Op);
   1748   SDValue Chain = Op.getOperand(0);
   1749   SDValue TheBit = Op.getOperand(1);
   1750   SDValue DestBB = Op.getOperand(2);
   1751 
   1752   // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
   1753   // that as the consumer we are responsible for ignoring rubbish in higher
   1754   // bits.
   1755   TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
   1756                        DAG.getConstant(1, MVT::i32));
   1757 
   1758   SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
   1759                                DAG.getConstant(0, TheBit.getValueType()),
   1760                                DAG.getCondCode(ISD::SETNE));
   1761 
   1762   return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other, Chain,
   1763                      A64CMP, DAG.getConstant(A64CC::NE, MVT::i32),
   1764                      DestBB);
   1765 }
   1766 
   1767 // (BR_CC chain, condcode, lhs, rhs, dest)
   1768 SDValue
   1769 AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   1770   SDLoc dl(Op);
   1771   SDValue Chain = Op.getOperand(0);
   1772   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
   1773   SDValue LHS = Op.getOperand(2);
   1774   SDValue RHS = Op.getOperand(3);
   1775   SDValue DestBB = Op.getOperand(4);
   1776 
   1777   if (LHS.getValueType() == MVT::f128) {
   1778     // f128 comparisons are lowered to runtime calls by a routine which sets
   1779     // LHS, RHS and CC appropriately for the rest of this function to continue.
   1780     softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
   1781 
   1782     // If softenSetCCOperands returned a scalar, we need to compare the result
   1783     // against zero to select between true and false values.
   1784     if (RHS.getNode() == 0) {
   1785       RHS = DAG.getConstant(0, LHS.getValueType());
   1786       CC = ISD::SETNE;
   1787     }
   1788   }
   1789 
   1790   if (LHS.getValueType().isInteger()) {
   1791     SDValue A64cc;
   1792 
   1793     // Integers are handled in a separate function because the combinations of
   1794     // immediates and tests can get hairy and we may want to fiddle things.
   1795     SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
   1796 
   1797     return DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
   1798                        Chain, CmpOp, A64cc, DestBB);
   1799   }
   1800 
   1801   // Note that some LLVM floating-point CondCodes can't be lowered to a single
   1802   // conditional branch, hence FPCCToA64CC can set a second test, where either
   1803   // passing is sufficient.
   1804   A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
   1805   CondCode = FPCCToA64CC(CC, Alternative);
   1806   SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
   1807   SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
   1808                               DAG.getCondCode(CC));
   1809   SDValue A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
   1810                                  Chain, SetCC, A64cc, DestBB);
   1811 
   1812   if (Alternative != A64CC::Invalid) {
   1813     A64cc = DAG.getConstant(Alternative, MVT::i32);
   1814     A64BR_CC = DAG.getNode(AArch64ISD::BR_CC, dl, MVT::Other,
   1815                            A64BR_CC, SetCC, A64cc, DestBB);
   1816 
   1817   }
   1818 
   1819   return A64BR_CC;
   1820 }
   1821 
   1822 SDValue
   1823 AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG,
   1824                                        RTLIB::Libcall Call) const {
   1825   ArgListTy Args;
   1826   ArgListEntry Entry;
   1827   for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) {
   1828     EVT ArgVT = Op.getOperand(i).getValueType();
   1829     Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
   1830     Entry.Node = Op.getOperand(i); Entry.Ty = ArgTy;
   1831     Entry.isSExt = false;
   1832     Entry.isZExt = false;
   1833     Args.push_back(Entry);
   1834   }
   1835   SDValue Callee = DAG.getExternalSymbol(getLibcallName(Call), getPointerTy());
   1836 
   1837   Type *RetTy = Op.getValueType().getTypeForEVT(*DAG.getContext());
   1838 
   1839   // By default, the input chain to this libcall is the entry node of the
   1840   // function. If the libcall is going to be emitted as a tail call then
   1841   // isUsedByReturnOnly will change it to the right chain if the return
   1842   // node which is being folded has a non-entry input chain.
   1843   SDValue InChain = DAG.getEntryNode();
   1844 
   1845   // isTailCall may be true since the callee does not reference caller stack
   1846   // frame. Check if it's in the right position.
   1847   SDValue TCChain = InChain;
   1848   bool isTailCall = isInTailCallPosition(DAG, Op.getNode(), TCChain);
   1849   if (isTailCall)
   1850     InChain = TCChain;
   1851 
   1852   TargetLowering::
   1853   CallLoweringInfo CLI(InChain, RetTy, false, false, false, false,
   1854                     0, getLibcallCallingConv(Call), isTailCall,
   1855                     /*doesNotReturn=*/false, /*isReturnValueUsed=*/true,
   1856                     Callee, Args, DAG, SDLoc(Op));
   1857   std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
   1858 
   1859   if (!CallInfo.second.getNode())
   1860     // It's a tailcall, return the chain (which is the DAG root).
   1861     return DAG.getRoot();
   1862 
   1863   return CallInfo.first;
   1864 }
   1865 
   1866 SDValue
   1867 AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
   1868   if (Op.getOperand(0).getValueType() != MVT::f128) {
   1869     // It's legal except when f128 is involved
   1870     return Op;
   1871   }
   1872 
   1873   RTLIB::Libcall LC;
   1874   LC  = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
   1875 
   1876   SDValue SrcVal = Op.getOperand(0);
   1877   return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1,
   1878                      /*isSigned*/ false, SDLoc(Op));
   1879 }
   1880 
   1881 SDValue
   1882 AArch64TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
   1883   assert(Op.getValueType() == MVT::f128 && "Unexpected lowering");
   1884 
   1885   RTLIB::Libcall LC;
   1886   LC  = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
   1887 
   1888   return LowerF128ToCall(Op, DAG, LC);
   1889 }
   1890 
   1891 SDValue
   1892 AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   1893                                       bool IsSigned) const {
   1894   if (Op.getOperand(0).getValueType() != MVT::f128) {
   1895     // It's legal except when f128 is involved
   1896     return Op;
   1897   }
   1898 
   1899   RTLIB::Libcall LC;
   1900   if (IsSigned)
   1901     LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
   1902   else
   1903     LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
   1904 
   1905   return LowerF128ToCall(Op, DAG, LC);
   1906 }
   1907 
   1908 SDValue
   1909 AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op,
   1910                                                   SelectionDAG &DAG) const {
   1911   assert(getTargetMachine().getCodeModel() == CodeModel::Large);
   1912   assert(getTargetMachine().getRelocationModel() == Reloc::Static);
   1913 
   1914   EVT PtrVT = getPointerTy();
   1915   SDLoc dl(Op);
   1916   const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
   1917   const GlobalValue *GV = GN->getGlobal();
   1918 
   1919   SDValue GlobalAddr = DAG.getNode(
   1920       AArch64ISD::WrapperLarge, dl, PtrVT,
   1921       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G3),
   1922       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G2_NC),
   1923       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G1_NC),
   1924       DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, AArch64II::MO_ABS_G0_NC));
   1925 
   1926   if (GN->getOffset() != 0)
   1927     return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
   1928                        DAG.getConstant(GN->getOffset(), PtrVT));
   1929 
   1930   return GlobalAddr;
   1931 }
   1932 
   1933 SDValue
   1934 AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op,
   1935                                                   SelectionDAG &DAG) const {
   1936   assert(getTargetMachine().getCodeModel() == CodeModel::Small);
   1937 
   1938   EVT PtrVT = getPointerTy();
   1939   SDLoc dl(Op);
   1940   const GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
   1941   const GlobalValue *GV = GN->getGlobal();
   1942   unsigned Alignment = GV->getAlignment();
   1943   Reloc::Model RelocM = getTargetMachine().getRelocationModel();
   1944   if (GV->isWeakForLinker() && GV->isDeclaration() && RelocM == Reloc::Static) {
   1945     // Weak undefined symbols can't use ADRP/ADD pair since they should evaluate
   1946     // to zero when they remain undefined. In PIC mode the GOT can take care of
   1947     // this, but in absolute mode we use a constant pool load.
   1948     SDValue PoolAddr;
   1949     PoolAddr = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
   1950                            DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
   1951                                                      AArch64II::MO_NO_FLAG),
   1952                            DAG.getTargetConstantPool(GV, PtrVT, 0, 0,
   1953                                                      AArch64II::MO_LO12),
   1954                            DAG.getConstant(8, MVT::i32));
   1955     SDValue GlobalAddr = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), PoolAddr,
   1956                                      MachinePointerInfo::getConstantPool(),
   1957                                      /*isVolatile=*/ false,
   1958                                      /*isNonTemporal=*/ true,
   1959                                      /*isInvariant=*/ true, 8);
   1960     if (GN->getOffset() != 0)
   1961       return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalAddr,
   1962                          DAG.getConstant(GN->getOffset(), PtrVT));
   1963 
   1964     return GlobalAddr;
   1965   }
   1966 
   1967   if (Alignment == 0) {
   1968     const PointerType *GVPtrTy = cast<PointerType>(GV->getType());
   1969     if (GVPtrTy->getElementType()->isSized()) {
   1970       Alignment
   1971         = getDataLayout()->getABITypeAlignment(GVPtrTy->getElementType());
   1972     } else {
   1973       // Be conservative if we can't guess, not that it really matters:
   1974       // functions and labels aren't valid for loads, and the methods used to
   1975       // actually calculate an address work with any alignment.
   1976       Alignment = 1;
   1977     }
   1978   }
   1979 
   1980   unsigned char HiFixup, LoFixup;
   1981   bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM);
   1982 
   1983   if (UseGOT) {
   1984     HiFixup = AArch64II::MO_GOT;
   1985     LoFixup = AArch64II::MO_GOT_LO12;
   1986     Alignment = 8;
   1987   } else {
   1988     HiFixup = AArch64II::MO_NO_FLAG;
   1989     LoFixup = AArch64II::MO_LO12;
   1990   }
   1991 
   1992   // AArch64's small model demands the following sequence:
   1993   // ADRP x0, somewhere
   1994   // ADD x0, x0, #:lo12:somewhere ; (or LDR directly).
   1995   SDValue GlobalRef = DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
   1996                                   DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
   1997                                                              HiFixup),
   1998                                   DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
   1999                                                              LoFixup),
   2000                                   DAG.getConstant(Alignment, MVT::i32));
   2001 
   2002   if (UseGOT) {
   2003     GlobalRef = DAG.getNode(AArch64ISD::GOTLoad, dl, PtrVT, DAG.getEntryNode(),
   2004                             GlobalRef);
   2005   }
   2006 
   2007   if (GN->getOffset() != 0)
   2008     return DAG.getNode(ISD::ADD, dl, PtrVT, GlobalRef,
   2009                        DAG.getConstant(GN->getOffset(), PtrVT));
   2010 
   2011   return GlobalRef;
   2012 }
   2013 
   2014 SDValue
   2015 AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op,
   2016                                              SelectionDAG &DAG) const {
   2017   // TableGen doesn't have easy access to the CodeModel or RelocationModel, so
   2018   // we make those distinctions here.
   2019 
   2020   switch (getTargetMachine().getCodeModel()) {
   2021   case CodeModel::Small:
   2022     return LowerGlobalAddressELFSmall(Op, DAG);
   2023   case CodeModel::Large:
   2024     return LowerGlobalAddressELFLarge(Op, DAG);
   2025   default:
   2026     llvm_unreachable("Only small and large code models supported now");
   2027   }
   2028 }
   2029 
   2030 SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr,
   2031                                                 SDValue DescAddr,
   2032                                                 SDLoc DL,
   2033                                                 SelectionDAG &DAG) const {
   2034   EVT PtrVT = getPointerTy();
   2035 
   2036   // The function we need to call is simply the first entry in the GOT for this
   2037   // descriptor, load it in preparation.
   2038   SDValue Func, Chain;
   2039   Func = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
   2040                      DescAddr);
   2041 
   2042   // The function takes only one argument: the address of the descriptor itself
   2043   // in X0.
   2044   SDValue Glue;
   2045   Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, AArch64::X0, DescAddr, Glue);
   2046   Glue = Chain.getValue(1);
   2047 
   2048   // Finally, there's a special calling-convention which means that the lookup
   2049   // must preserve all registers (except X0, obviously).
   2050   const TargetRegisterInfo *TRI  = getTargetMachine().getRegisterInfo();
   2051   const AArch64RegisterInfo *A64RI
   2052     = static_cast<const AArch64RegisterInfo *>(TRI);
   2053   const uint32_t *Mask = A64RI->getTLSDescCallPreservedMask();
   2054 
   2055   // We're now ready to populate the argument list, as with a normal call:
   2056   std::vector<SDValue> Ops;
   2057   Ops.push_back(Chain);
   2058   Ops.push_back(Func);
   2059   Ops.push_back(SymAddr);
   2060   Ops.push_back(DAG.getRegister(AArch64::X0, PtrVT));
   2061   Ops.push_back(DAG.getRegisterMask(Mask));
   2062   Ops.push_back(Glue);
   2063 
   2064   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   2065   Chain = DAG.getNode(AArch64ISD::TLSDESCCALL, DL, NodeTys, &Ops[0],
   2066                       Ops.size());
   2067   Glue = Chain.getValue(1);
   2068 
   2069   // After the call, the offset from TPIDR_EL0 is in X0, copy it out and pass it
   2070   // back to the generic handling code.
   2071   return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
   2072 }
   2073 
   2074 SDValue
   2075 AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
   2076                                              SelectionDAG &DAG) const {
   2077   assert(getSubtarget()->isTargetELF() &&
   2078          "TLS not implemented for non-ELF targets");
   2079   assert(getTargetMachine().getCodeModel() == CodeModel::Small
   2080          && "TLS only supported in small memory model");
   2081   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   2082 
   2083   TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
   2084 
   2085   SDValue TPOff;
   2086   EVT PtrVT = getPointerTy();
   2087   SDLoc DL(Op);
   2088   const GlobalValue *GV = GA->getGlobal();
   2089 
   2090   SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
   2091 
   2092   if (Model == TLSModel::InitialExec) {
   2093     TPOff = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
   2094                         DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
   2095                                                    AArch64II::MO_GOTTPREL),
   2096                         DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
   2097                                                    AArch64II::MO_GOTTPREL_LO12),
   2098                         DAG.getConstant(8, MVT::i32));
   2099     TPOff = DAG.getNode(AArch64ISD::GOTLoad, DL, PtrVT, DAG.getEntryNode(),
   2100                         TPOff);
   2101   } else if (Model == TLSModel::LocalExec) {
   2102     SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
   2103                                                AArch64II::MO_TPREL_G1);
   2104     SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
   2105                                                AArch64II::MO_TPREL_G0_NC);
   2106 
   2107     TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
   2108                                        DAG.getTargetConstant(1, MVT::i32)), 0);
   2109     TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
   2110                                        TPOff, LoVar,
   2111                                        DAG.getTargetConstant(0, MVT::i32)), 0);
   2112   } else if (Model == TLSModel::GeneralDynamic) {
   2113     // Accesses used in this sequence go via the TLS descriptor which lives in
   2114     // the GOT. Prepare an address we can use to handle this.
   2115     SDValue HiDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
   2116                                                 AArch64II::MO_TLSDESC);
   2117     SDValue LoDesc = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
   2118                                                 AArch64II::MO_TLSDESC_LO12);
   2119     SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
   2120                                    HiDesc, LoDesc,
   2121                                    DAG.getConstant(8, MVT::i32));
   2122     SDValue SymAddr = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0);
   2123 
   2124     TPOff = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
   2125   } else if (Model == TLSModel::LocalDynamic) {
   2126     // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
   2127     // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
   2128     // the beginning of the module's TLS region, followed by a DTPREL offset
   2129     // calculation.
   2130 
   2131     // These accesses will need deduplicating if there's more than one.
   2132     AArch64MachineFunctionInfo* MFI = DAG.getMachineFunction()
   2133       .getInfo<AArch64MachineFunctionInfo>();
   2134     MFI->incNumLocalDynamicTLSAccesses();
   2135 
   2136 
   2137     // Get the location of _TLS_MODULE_BASE_:
   2138     SDValue HiDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
   2139                                                 AArch64II::MO_TLSDESC);
   2140     SDValue LoDesc = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
   2141                                                 AArch64II::MO_TLSDESC_LO12);
   2142     SDValue DescAddr = DAG.getNode(AArch64ISD::WrapperSmall, DL, PtrVT,
   2143                                    HiDesc, LoDesc,
   2144                                    DAG.getConstant(8, MVT::i32));
   2145     SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT);
   2146 
   2147     ThreadBase = LowerTLSDescCall(SymAddr, DescAddr, DL, DAG);
   2148 
   2149     // Get the variable's offset from _TLS_MODULE_BASE_
   2150     SDValue HiVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
   2151                                                AArch64II::MO_DTPREL_G1);
   2152     SDValue LoVar = DAG.getTargetGlobalAddress(GV, DL, MVT::i64, 0,
   2153                                                AArch64II::MO_DTPREL_G0_NC);
   2154 
   2155     TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar,
   2156                                        DAG.getTargetConstant(0, MVT::i32)), 0);
   2157     TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT,
   2158                                        TPOff, LoVar,
   2159                                        DAG.getTargetConstant(0, MVT::i32)), 0);
   2160   } else
   2161       llvm_unreachable("Unsupported TLS access model");
   2162 
   2163 
   2164   return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
   2165 }
   2166 
   2167 SDValue
   2168 AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
   2169                                       bool IsSigned) const {
   2170   if (Op.getValueType() != MVT::f128) {
   2171     // Legal for everything except f128.
   2172     return Op;
   2173   }
   2174 
   2175   RTLIB::Libcall LC;
   2176   if (IsSigned)
   2177     LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
   2178   else
   2179     LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
   2180 
   2181   return LowerF128ToCall(Op, DAG, LC);
   2182 }
   2183 
   2184 
   2185 SDValue
   2186 AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   2187   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
   2188   SDLoc dl(JT);
   2189   EVT PtrVT = getPointerTy();
   2190 
   2191   // When compiling PIC, jump tables get put in the code section so a static
   2192   // relocation-style is acceptable for both cases.
   2193   switch (getTargetMachine().getCodeModel()) {
   2194   case CodeModel::Small:
   2195     return DAG.getNode(AArch64ISD::WrapperSmall, dl, PtrVT,
   2196                        DAG.getTargetJumpTable(JT->getIndex(), PtrVT),
   2197                        DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
   2198                                               AArch64II::MO_LO12),
   2199                        DAG.getConstant(1, MVT::i32));
   2200   case CodeModel::Large:
   2201     return DAG.getNode(
   2202       AArch64ISD::WrapperLarge, dl, PtrVT,
   2203       DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G3),
   2204       DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G2_NC),
   2205       DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G1_NC),
   2206       DAG.getTargetJumpTable(JT->getIndex(), PtrVT, AArch64II::MO_ABS_G0_NC));
   2207   default:
   2208     llvm_unreachable("Only small and large code models supported now");
   2209   }
   2210 }
   2211 
   2212 // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode)
   2213 SDValue
   2214 AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   2215   SDLoc dl(Op);
   2216   SDValue LHS = Op.getOperand(0);
   2217   SDValue RHS = Op.getOperand(1);
   2218   SDValue IfTrue = Op.getOperand(2);
   2219   SDValue IfFalse = Op.getOperand(3);
   2220   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
   2221 
   2222   if (LHS.getValueType() == MVT::f128) {
   2223     // f128 comparisons are lowered to libcalls, but slot in nicely here
   2224     // afterwards.
   2225     softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
   2226 
   2227     // If softenSetCCOperands returned a scalar, we need to compare the result
   2228     // against zero to select between true and false values.
   2229     if (RHS.getNode() == 0) {
   2230       RHS = DAG.getConstant(0, LHS.getValueType());
   2231       CC = ISD::SETNE;
   2232     }
   2233   }
   2234 
   2235   if (LHS.getValueType().isInteger()) {
   2236     SDValue A64cc;
   2237 
   2238     // Integers are handled in a separate function because the combinations of
   2239     // immediates and tests can get hairy and we may want to fiddle things.
   2240     SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
   2241 
   2242     return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
   2243                        CmpOp, IfTrue, IfFalse, A64cc);
   2244   }
   2245 
   2246   // Note that some LLVM floating-point CondCodes can't be lowered to a single
   2247   // conditional branch, hence FPCCToA64CC can set a second test, where either
   2248   // passing is sufficient.
   2249   A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
   2250   CondCode = FPCCToA64CC(CC, Alternative);
   2251   SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
   2252   SDValue SetCC = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
   2253                               DAG.getCondCode(CC));
   2254   SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl,
   2255                                      Op.getValueType(),
   2256                                      SetCC, IfTrue, IfFalse, A64cc);
   2257 
   2258   if (Alternative != A64CC::Invalid) {
   2259     A64cc = DAG.getConstant(Alternative, MVT::i32);
   2260     A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
   2261                                SetCC, IfTrue, A64SELECT_CC, A64cc);
   2262 
   2263   }
   2264 
   2265   return A64SELECT_CC;
   2266 }
   2267 
   2268 // (SELECT testbit, iftrue, iffalse)
   2269 SDValue
   2270 AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   2271   SDLoc dl(Op);
   2272   SDValue TheBit = Op.getOperand(0);
   2273   SDValue IfTrue = Op.getOperand(1);
   2274   SDValue IfFalse = Op.getOperand(2);
   2275 
   2276   // AArch64 BooleanContents is the default UndefinedBooleanContent, which means
   2277   // that as the consumer we are responsible for ignoring rubbish in higher
   2278   // bits.
   2279   TheBit = DAG.getNode(ISD::AND, dl, MVT::i32, TheBit,
   2280                        DAG.getConstant(1, MVT::i32));
   2281   SDValue A64CMP = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, TheBit,
   2282                                DAG.getConstant(0, TheBit.getValueType()),
   2283                                DAG.getCondCode(ISD::SETNE));
   2284 
   2285   return DAG.getNode(AArch64ISD::SELECT_CC, dl, Op.getValueType(),
   2286                      A64CMP, IfTrue, IfFalse,
   2287                      DAG.getConstant(A64CC::NE, MVT::i32));
   2288 }
   2289 
   2290 static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) {
   2291   SDLoc DL(Op);
   2292   SDValue LHS = Op.getOperand(0);
   2293   SDValue RHS = Op.getOperand(1);
   2294   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   2295   EVT VT = Op.getValueType();
   2296   bool Invert = false;
   2297   SDValue Op0, Op1;
   2298   unsigned Opcode;
   2299 
   2300   if (LHS.getValueType().isInteger()) {
   2301 
   2302     // Attempt to use Vector Integer Compare Mask Test instruction.
   2303     // TST = icmp ne (and (op0, op1), zero).
   2304     if (CC == ISD::SETNE) {
   2305       if (((LHS.getOpcode() == ISD::AND) &&
   2306            ISD::isBuildVectorAllZeros(RHS.getNode())) ||
   2307           ((RHS.getOpcode() == ISD::AND) &&
   2308            ISD::isBuildVectorAllZeros(LHS.getNode()))) {
   2309 
   2310         SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS;
   2311         SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0));
   2312         SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1));
   2313         return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS);
   2314       }
   2315     }
   2316 
   2317     // Attempt to use Vector Integer Compare Mask against Zero instr (Signed).
   2318     // Note: Compare against Zero does not support unsigned predicates.
   2319     if ((ISD::isBuildVectorAllZeros(RHS.getNode()) ||
   2320          ISD::isBuildVectorAllZeros(LHS.getNode())) &&
   2321         !isUnsignedIntSetCC(CC)) {
   2322 
   2323       // If LHS is the zero value, swap operands and CondCode.
   2324       if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
   2325         CC = getSetCCSwappedOperands(CC);
   2326         Op0 = RHS;
   2327       } else
   2328         Op0 = LHS;
   2329 
   2330       // Ensure valid CondCode for Compare Mask against Zero instruction:
   2331       // EQ, GE, GT, LE, LT.
   2332       if (ISD::SETNE == CC) {
   2333         Invert = true;
   2334         CC = ISD::SETEQ;
   2335       }
   2336 
   2337       // Using constant type to differentiate integer and FP compares with zero.
   2338       Op1 = DAG.getConstant(0, MVT::i32);
   2339       Opcode = AArch64ISD::NEON_CMPZ;
   2340 
   2341     } else {
   2342       // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned).
   2343       // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT.
   2344       bool Swap = false;
   2345       switch (CC) {
   2346       default:
   2347         llvm_unreachable("Illegal integer comparison.");
   2348       case ISD::SETEQ:
   2349       case ISD::SETGT:
   2350       case ISD::SETGE:
   2351       case ISD::SETUGT:
   2352       case ISD::SETUGE:
   2353         break;
   2354       case ISD::SETNE:
   2355         Invert = true;
   2356         CC = ISD::SETEQ;
   2357         break;
   2358       case ISD::SETULT:
   2359       case ISD::SETULE:
   2360       case ISD::SETLT:
   2361       case ISD::SETLE:
   2362         Swap = true;
   2363         CC = getSetCCSwappedOperands(CC);
   2364       }
   2365 
   2366       if (Swap)
   2367         std::swap(LHS, RHS);
   2368 
   2369       Opcode = AArch64ISD::NEON_CMP;
   2370       Op0 = LHS;
   2371       Op1 = RHS;
   2372     }
   2373 
   2374     // Generate Compare Mask instr or Compare Mask against Zero instr.
   2375     SDValue NeonCmp =
   2376         DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
   2377 
   2378     if (Invert)
   2379       NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
   2380 
   2381     return NeonCmp;
   2382   }
   2383 
   2384   // Now handle Floating Point cases.
   2385   // Attempt to use Vector Floating Point Compare Mask against Zero instruction.
   2386   if (ISD::isBuildVectorAllZeros(RHS.getNode()) ||
   2387       ISD::isBuildVectorAllZeros(LHS.getNode())) {
   2388 
   2389     // If LHS is the zero value, swap operands and CondCode.
   2390     if (ISD::isBuildVectorAllZeros(LHS.getNode())) {
   2391       CC = getSetCCSwappedOperands(CC);
   2392       Op0 = RHS;
   2393     } else
   2394       Op0 = LHS;
   2395 
   2396     // Using constant type to differentiate integer and FP compares with zero.
   2397     Op1 = DAG.getConstantFP(0, MVT::f32);
   2398     Opcode = AArch64ISD::NEON_CMPZ;
   2399   } else {
   2400     // Attempt to use Vector Floating Point Compare Mask instruction.
   2401     Op0 = LHS;
   2402     Op1 = RHS;
   2403     Opcode = AArch64ISD::NEON_CMP;
   2404   }
   2405 
   2406   SDValue NeonCmpAlt;
   2407   // Some register compares have to be implemented with swapped CC and operands,
   2408   // e.g.: OLT implemented as OGT with swapped operands.
   2409   bool SwapIfRegArgs = false;
   2410 
   2411   // Ensure valid CondCode for FP Compare Mask against Zero instruction:
   2412   // EQ, GE, GT, LE, LT.
   2413   // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT.
   2414   switch (CC) {
   2415   default:
   2416     llvm_unreachable("Illegal FP comparison");
   2417   case ISD::SETUNE:
   2418   case ISD::SETNE:
   2419     Invert = true; // Fallthrough
   2420   case ISD::SETOEQ:
   2421   case ISD::SETEQ:
   2422     CC = ISD::SETEQ;
   2423     break;
   2424   case ISD::SETOLT:
   2425   case ISD::SETLT:
   2426     CC = ISD::SETLT;
   2427     SwapIfRegArgs = true;
   2428     break;
   2429   case ISD::SETOGT:
   2430   case ISD::SETGT:
   2431     CC = ISD::SETGT;
   2432     break;
   2433   case ISD::SETOLE:
   2434   case ISD::SETLE:
   2435     CC = ISD::SETLE;
   2436     SwapIfRegArgs = true;
   2437     break;
   2438   case ISD::SETOGE:
   2439   case ISD::SETGE:
   2440     CC = ISD::SETGE;
   2441     break;
   2442   case ISD::SETUGE:
   2443     Invert = true;
   2444     CC = ISD::SETLT;
   2445     SwapIfRegArgs = true;
   2446     break;
   2447   case ISD::SETULE:
   2448     Invert = true;
   2449     CC = ISD::SETGT;
   2450     break;
   2451   case ISD::SETUGT:
   2452     Invert = true;
   2453     CC = ISD::SETLE;
   2454     SwapIfRegArgs = true;
   2455     break;
   2456   case ISD::SETULT:
   2457     Invert = true;
   2458     CC = ISD::SETGE;
   2459     break;
   2460   case ISD::SETUEQ:
   2461     Invert = true; // Fallthrough
   2462   case ISD::SETONE:
   2463     // Expand this to (OGT |OLT).
   2464     NeonCmpAlt =
   2465         DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT));
   2466     CC = ISD::SETLT;
   2467     SwapIfRegArgs = true;
   2468     break;
   2469   case ISD::SETUO:
   2470     Invert = true; // Fallthrough
   2471   case ISD::SETO:
   2472     // Expand this to (OGE | OLT).
   2473     NeonCmpAlt =
   2474         DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE));
   2475     CC = ISD::SETLT;
   2476     SwapIfRegArgs = true;
   2477     break;
   2478   }
   2479 
   2480   if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) {
   2481     CC = getSetCCSwappedOperands(CC);
   2482     std::swap(Op0, Op1);
   2483   }
   2484 
   2485   // Generate FP Compare Mask instr or FP Compare Mask against Zero instr
   2486   SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC));
   2487 
   2488   if (NeonCmpAlt.getNode())
   2489     NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt);
   2490 
   2491   if (Invert)
   2492     NeonCmp = DAG.getNOT(DL, NeonCmp, VT);
   2493 
   2494   return NeonCmp;
   2495 }
   2496 
   2497 // (SETCC lhs, rhs, condcode)
   2498 SDValue
   2499 AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   2500   SDLoc dl(Op);
   2501   SDValue LHS = Op.getOperand(0);
   2502   SDValue RHS = Op.getOperand(1);
   2503   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   2504   EVT VT = Op.getValueType();
   2505 
   2506   if (VT.isVector())
   2507     return LowerVectorSETCC(Op, DAG);
   2508 
   2509   if (LHS.getValueType() == MVT::f128) {
   2510     // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS
   2511     // for the rest of the function (some i32 or i64 values).
   2512     softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
   2513 
   2514     // If softenSetCCOperands returned a scalar, use it.
   2515     if (RHS.getNode() == 0) {
   2516       assert(LHS.getValueType() == Op.getValueType() &&
   2517              "Unexpected setcc expansion!");
   2518       return LHS;
   2519     }
   2520   }
   2521 
   2522   if (LHS.getValueType().isInteger()) {
   2523     SDValue A64cc;
   2524 
   2525     // Integers are handled in a separate function because the combinations of
   2526     // immediates and tests can get hairy and we may want to fiddle things.
   2527     SDValue CmpOp = getSelectableIntSetCC(LHS, RHS, CC, A64cc, DAG, dl);
   2528 
   2529     return DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
   2530                        CmpOp, DAG.getConstant(1, VT), DAG.getConstant(0, VT),
   2531                        A64cc);
   2532   }
   2533 
   2534   // Note that some LLVM floating-point CondCodes can't be lowered to a single
   2535   // conditional branch, hence FPCCToA64CC can set a second test, where either
   2536   // passing is sufficient.
   2537   A64CC::CondCodes CondCode, Alternative = A64CC::Invalid;
   2538   CondCode = FPCCToA64CC(CC, Alternative);
   2539   SDValue A64cc = DAG.getConstant(CondCode, MVT::i32);
   2540   SDValue CmpOp = DAG.getNode(AArch64ISD::SETCC, dl, MVT::i32, LHS, RHS,
   2541                               DAG.getCondCode(CC));
   2542   SDValue A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT,
   2543                                      CmpOp, DAG.getConstant(1, VT),
   2544                                      DAG.getConstant(0, VT), A64cc);
   2545 
   2546   if (Alternative != A64CC::Invalid) {
   2547     A64cc = DAG.getConstant(Alternative, MVT::i32);
   2548     A64SELECT_CC = DAG.getNode(AArch64ISD::SELECT_CC, dl, VT, CmpOp,
   2549                                DAG.getConstant(1, VT), A64SELECT_CC, A64cc);
   2550   }
   2551 
   2552   return A64SELECT_CC;
   2553 }
   2554 
   2555 SDValue
   2556 AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   2557   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   2558   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   2559 
   2560   // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes
   2561   // rather than just 8.
   2562   return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op),
   2563                        Op.getOperand(1), Op.getOperand(2),
   2564                        DAG.getConstant(32, MVT::i32), 8, false, false,
   2565                        MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
   2566 }
   2567 
   2568 SDValue
   2569 AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   2570   // The layout of the va_list struct is specified in the AArch64 Procedure Call
   2571   // Standard, section B.3.
   2572   MachineFunction &MF = DAG.getMachineFunction();
   2573   AArch64MachineFunctionInfo *FuncInfo
   2574     = MF.getInfo<AArch64MachineFunctionInfo>();
   2575   SDLoc DL(Op);
   2576 
   2577   SDValue Chain = Op.getOperand(0);
   2578   SDValue VAList = Op.getOperand(1);
   2579   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2580   SmallVector<SDValue, 4> MemOps;
   2581 
   2582   // void *__stack at offset 0
   2583   SDValue Stack = DAG.getFrameIndex(FuncInfo->getVariadicStackIdx(),
   2584                                     getPointerTy());
   2585   MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
   2586                                 MachinePointerInfo(SV), false, false, 0));
   2587 
   2588   // void *__gr_top at offset 8
   2589   int GPRSize = FuncInfo->getVariadicGPRSize();
   2590   if (GPRSize > 0) {
   2591     SDValue GRTop, GRTopAddr;
   2592 
   2593     GRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
   2594                             DAG.getConstant(8, getPointerTy()));
   2595 
   2596     GRTop = DAG.getFrameIndex(FuncInfo->getVariadicGPRIdx(), getPointerTy());
   2597     GRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), GRTop,
   2598                         DAG.getConstant(GPRSize, getPointerTy()));
   2599 
   2600     MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
   2601                                   MachinePointerInfo(SV, 8),
   2602                                   false, false, 0));
   2603   }
   2604 
   2605   // void *__vr_top at offset 16
   2606   int FPRSize = FuncInfo->getVariadicFPRSize();
   2607   if (FPRSize > 0) {
   2608     SDValue VRTop, VRTopAddr;
   2609     VRTopAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
   2610                             DAG.getConstant(16, getPointerTy()));
   2611 
   2612     VRTop = DAG.getFrameIndex(FuncInfo->getVariadicFPRIdx(), getPointerTy());
   2613     VRTop = DAG.getNode(ISD::ADD, DL, getPointerTy(), VRTop,
   2614                         DAG.getConstant(FPRSize, getPointerTy()));
   2615 
   2616     MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
   2617                                   MachinePointerInfo(SV, 16),
   2618                                   false, false, 0));
   2619   }
   2620 
   2621   // int __gr_offs at offset 24
   2622   SDValue GROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
   2623                                    DAG.getConstant(24, getPointerTy()));
   2624   MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-GPRSize, MVT::i32),
   2625                                 GROffsAddr, MachinePointerInfo(SV, 24),
   2626                                 false, false, 0));
   2627 
   2628   // int __vr_offs at offset 28
   2629   SDValue VROffsAddr = DAG.getNode(ISD::ADD, DL, getPointerTy(), VAList,
   2630                                    DAG.getConstant(28, getPointerTy()));
   2631   MemOps.push_back(DAG.getStore(Chain, DL, DAG.getConstant(-FPRSize, MVT::i32),
   2632                                 VROffsAddr, MachinePointerInfo(SV, 28),
   2633                                 false, false, 0));
   2634 
   2635   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0],
   2636                      MemOps.size());
   2637 }
   2638 
   2639 SDValue
   2640 AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   2641   switch (Op.getOpcode()) {
   2642   default: llvm_unreachable("Don't know how to custom lower this!");
   2643   case ISD::FADD: return LowerF128ToCall(Op, DAG, RTLIB::ADD_F128);
   2644   case ISD::FSUB: return LowerF128ToCall(Op, DAG, RTLIB::SUB_F128);
   2645   case ISD::FMUL: return LowerF128ToCall(Op, DAG, RTLIB::MUL_F128);
   2646   case ISD::FDIV: return LowerF128ToCall(Op, DAG, RTLIB::DIV_F128);
   2647   case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, true);
   2648   case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG, false);
   2649   case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG, true);
   2650   case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false);
   2651   case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
   2652   case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
   2653 
   2654   case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
   2655   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   2656   case ISD::BR_CC: return LowerBR_CC(Op, DAG);
   2657   case ISD::GlobalAddress: return LowerGlobalAddressELF(Op, DAG);
   2658   case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
   2659   case ISD::JumpTable: return LowerJumpTable(Op, DAG);
   2660   case ISD::SELECT: return LowerSELECT(Op, DAG);
   2661   case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
   2662   case ISD::SETCC: return LowerSETCC(Op, DAG);
   2663   case ISD::VACOPY: return LowerVACOPY(Op, DAG);
   2664   case ISD::VASTART: return LowerVASTART(Op, DAG);
   2665   case ISD::BUILD_VECTOR:
   2666     return LowerBUILD_VECTOR(Op, DAG, getSubtarget());
   2667   }
   2668 
   2669   return SDValue();
   2670 }
   2671 
   2672 /// Check if the specified splat value corresponds to a valid vector constant
   2673 /// for a Neon instruction with a "modified immediate" operand (e.g., MOVI).  If
   2674 /// so, return the encoded 8-bit immediate and the OpCmode instruction fields
   2675 /// values.
   2676 static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
   2677                               unsigned SplatBitSize, SelectionDAG &DAG,
   2678                               bool is128Bits, NeonModImmType type, EVT &VT,
   2679                               unsigned &Imm, unsigned &OpCmode) {
   2680   switch (SplatBitSize) {
   2681   default:
   2682     llvm_unreachable("unexpected size for isNeonModifiedImm");
   2683   case 8: {
   2684     if (type != Neon_Mov_Imm)
   2685       return false;
   2686     assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big");
   2687     // Neon movi per byte: Op=0, Cmode=1110.
   2688     OpCmode = 0xe;
   2689     Imm = SplatBits;
   2690     VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
   2691     break;
   2692   }
   2693   case 16: {
   2694     // Neon move inst per halfword
   2695     VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
   2696     if ((SplatBits & ~0xff) == 0) {
   2697       // Value = 0x00nn is 0x00nn LSL 0
   2698       // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000
   2699       // bic:  Op=1, Cmode=1001;  orr:  Op=0, Cmode=1001
   2700       // Op=x, Cmode=100y
   2701       Imm = SplatBits;
   2702       OpCmode = 0x8;
   2703       break;
   2704     }
   2705     if ((SplatBits & ~0xff00) == 0) {
   2706       // Value = 0xnn00 is 0x00nn LSL 8
   2707       // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010
   2708       // bic:  Op=1, Cmode=1011;  orr:  Op=0, Cmode=1011
   2709       // Op=x, Cmode=101x
   2710       Imm = SplatBits >> 8;
   2711       OpCmode = 0xa;
   2712       break;
   2713     }
   2714     // can't handle any other
   2715     return false;
   2716   }
   2717 
   2718   case 32: {
   2719     // First the LSL variants (MSL is unusable by some interested instructions).
   2720 
   2721     // Neon move instr per word, shift zeros
   2722     VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
   2723     if ((SplatBits & ~0xff) == 0) {
   2724       // Value = 0x000000nn is 0x000000nn LSL 0
   2725       // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000
   2726       // bic:  Op=1, Cmode= 0001; orr:  Op=0, Cmode= 0001
   2727       // Op=x, Cmode=000x
   2728       Imm = SplatBits;
   2729       OpCmode = 0;
   2730       break;
   2731     }
   2732     if ((SplatBits & ~0xff00) == 0) {
   2733       // Value = 0x0000nn00 is 0x000000nn LSL 8
   2734       // movi: Op=0, Cmode= 0010;  mvni: Op=1, Cmode= 0010
   2735       // bic:  Op=1, Cmode= 0011;  orr : Op=0, Cmode= 0011
   2736       // Op=x, Cmode=001x
   2737       Imm = SplatBits >> 8;
   2738       OpCmode = 0x2;
   2739       break;
   2740     }
   2741     if ((SplatBits & ~0xff0000) == 0) {
   2742       // Value = 0x00nn0000 is 0x000000nn LSL 16
   2743       // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100
   2744       // bic:  Op=1, Cmode= 0101; orr:  Op=0, Cmode= 0101
   2745       // Op=x, Cmode=010x
   2746       Imm = SplatBits >> 16;
   2747       OpCmode = 0x4;
   2748       break;
   2749     }
   2750     if ((SplatBits & ~0xff000000) == 0) {
   2751       // Value = 0xnn000000 is 0x000000nn LSL 24
   2752       // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110
   2753       // bic:  Op=1, Cmode= 0111; orr:  Op=0, Cmode= 0111
   2754       // Op=x, Cmode=011x
   2755       Imm = SplatBits >> 24;
   2756       OpCmode = 0x6;
   2757       break;
   2758     }
   2759 
   2760     // Now the MSL immediates.
   2761 
   2762     // Neon move instr per word, shift ones
   2763     if ((SplatBits & ~0xffff) == 0 &&
   2764         ((SplatBits | SplatUndef) & 0xff) == 0xff) {
   2765       // Value = 0x0000nnff is 0x000000nn MSL 8
   2766       // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100
   2767       // Op=x, Cmode=1100
   2768       Imm = SplatBits >> 8;
   2769       OpCmode = 0xc;
   2770       break;
   2771     }
   2772     if ((SplatBits & ~0xffffff) == 0 &&
   2773         ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
   2774       // Value = 0x00nnffff is 0x000000nn MSL 16
   2775       // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101
   2776       // Op=x, Cmode=1101
   2777       Imm = SplatBits >> 16;
   2778       OpCmode = 0xd;
   2779       break;
   2780     }
   2781     // can't handle any other
   2782     return false;
   2783   }
   2784 
   2785   case 64: {
   2786     if (type != Neon_Mov_Imm)
   2787       return false;
   2788     // Neon move instr bytemask, where each byte is either 0x00 or 0xff.
   2789     // movi Op=1, Cmode=1110.
   2790     OpCmode = 0x1e;
   2791     uint64_t BitMask = 0xff;
   2792     uint64_t Val = 0;
   2793     unsigned ImmMask = 1;
   2794     Imm = 0;
   2795     for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
   2796       if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
   2797         Val |= BitMask;
   2798         Imm |= ImmMask;
   2799       } else if ((SplatBits & BitMask) != 0) {
   2800         return false;
   2801       }
   2802       BitMask <<= 8;
   2803       ImmMask <<= 1;
   2804     }
   2805     SplatBits = Val;
   2806     VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
   2807     break;
   2808   }
   2809   }
   2810 
   2811   return true;
   2812 }
   2813 
   2814 static SDValue PerformANDCombine(SDNode *N,
   2815                                  TargetLowering::DAGCombinerInfo &DCI) {
   2816 
   2817   SelectionDAG &DAG = DCI.DAG;
   2818   SDLoc DL(N);
   2819   EVT VT = N->getValueType(0);
   2820 
   2821   // We're looking for an SRA/SHL pair which form an SBFX.
   2822 
   2823   if (VT != MVT::i32 && VT != MVT::i64)
   2824     return SDValue();
   2825 
   2826   if (!isa<ConstantSDNode>(N->getOperand(1)))
   2827     return SDValue();
   2828 
   2829   uint64_t TruncMask = N->getConstantOperandVal(1);
   2830   if (!isMask_64(TruncMask))
   2831     return SDValue();
   2832 
   2833   uint64_t Width = CountPopulation_64(TruncMask);
   2834   SDValue Shift = N->getOperand(0);
   2835 
   2836   if (Shift.getOpcode() != ISD::SRL)
   2837     return SDValue();
   2838 
   2839   if (!isa<ConstantSDNode>(Shift->getOperand(1)))
   2840     return SDValue();
   2841   uint64_t LSB = Shift->getConstantOperandVal(1);
   2842 
   2843   if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
   2844     return SDValue();
   2845 
   2846   return DAG.getNode(AArch64ISD::UBFX, DL, VT, Shift.getOperand(0),
   2847                      DAG.getConstant(LSB, MVT::i64),
   2848                      DAG.getConstant(LSB + Width - 1, MVT::i64));
   2849 }
   2850 
   2851 /// For a true bitfield insert, the bits getting into that contiguous mask
   2852 /// should come from the low part of an existing value: they must be formed from
   2853 /// a compatible SHL operation (unless they're already low). This function
   2854 /// checks that condition and returns the least-significant bit that's
   2855 /// intended. If the operation not a field preparation, -1 is returned.
   2856 static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT,
   2857                             SDValue &MaskedVal, uint64_t Mask) {
   2858   if (!isShiftedMask_64(Mask))
   2859     return -1;
   2860 
   2861   // Now we need to alter MaskedVal so that it is an appropriate input for a BFI
   2862   // instruction. BFI will do a left-shift by LSB before applying the mask we've
   2863   // spotted, so in general we should pre-emptively "undo" that by making sure
   2864   // the incoming bits have had a right-shift applied to them.
   2865   //
   2866   // This right shift, however, will combine with existing left/right shifts. In
   2867   // the simplest case of a completely straight bitfield operation, it will be
   2868   // expected to completely cancel out with an existing SHL. More complicated
   2869   // cases (e.g. bitfield to bitfield copy) may still need a real shift before
   2870   // the BFI.
   2871 
   2872   uint64_t LSB = countTrailingZeros(Mask);
   2873   int64_t ShiftRightRequired = LSB;
   2874   if (MaskedVal.getOpcode() == ISD::SHL &&
   2875       isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
   2876     ShiftRightRequired -= MaskedVal.getConstantOperandVal(1);
   2877     MaskedVal = MaskedVal.getOperand(0);
   2878   } else if (MaskedVal.getOpcode() == ISD::SRL &&
   2879              isa<ConstantSDNode>(MaskedVal.getOperand(1))) {
   2880     ShiftRightRequired += MaskedVal.getConstantOperandVal(1);
   2881     MaskedVal = MaskedVal.getOperand(0);
   2882   }
   2883 
   2884   if (ShiftRightRequired > 0)
   2885     MaskedVal = DAG.getNode(ISD::SRL, DL, VT, MaskedVal,
   2886                             DAG.getConstant(ShiftRightRequired, MVT::i64));
   2887   else if (ShiftRightRequired < 0) {
   2888     // We could actually end up with a residual left shift, for example with
   2889     // "struc.bitfield = val << 1".
   2890     MaskedVal = DAG.getNode(ISD::SHL, DL, VT, MaskedVal,
   2891                             DAG.getConstant(-ShiftRightRequired, MVT::i64));
   2892   }
   2893 
   2894   return LSB;
   2895 }
   2896 
   2897 /// Searches from N for an existing AArch64ISD::BFI node, possibly surrounded by
   2898 /// a mask and an extension. Returns true if a BFI was found and provides
   2899 /// information on its surroundings.
   2900 static bool findMaskedBFI(SDValue N, SDValue &BFI, uint64_t &Mask,
   2901                           bool &Extended) {
   2902   Extended = false;
   2903   if (N.getOpcode() == ISD::ZERO_EXTEND) {
   2904     Extended = true;
   2905     N = N.getOperand(0);
   2906   }
   2907 
   2908   if (N.getOpcode() == ISD::AND && isa<ConstantSDNode>(N.getOperand(1))) {
   2909     Mask = N->getConstantOperandVal(1);
   2910     N = N.getOperand(0);
   2911   } else {
   2912     // Mask is the whole width.
   2913     Mask = -1ULL >> (64 - N.getValueType().getSizeInBits());
   2914   }
   2915 
   2916   if (N.getOpcode() == AArch64ISD::BFI) {
   2917     BFI = N;
   2918     return true;
   2919   }
   2920 
   2921   return false;
   2922 }
   2923 
   2924 /// Try to combine a subtree (rooted at an OR) into a "masked BFI" node, which
   2925 /// is roughly equivalent to (and (BFI ...), mask). This form is used because it
   2926 /// can often be further combined with a larger mask. Ultimately, we want mask
   2927 /// to be 2^32-1 or 2^64-1 so the AND can be skipped.
   2928 static SDValue tryCombineToBFI(SDNode *N,
   2929                                TargetLowering::DAGCombinerInfo &DCI,
   2930                                const AArch64Subtarget *Subtarget) {
   2931   SelectionDAG &DAG = DCI.DAG;
   2932   SDLoc DL(N);
   2933   EVT VT = N->getValueType(0);
   2934 
   2935   assert(N->getOpcode() == ISD::OR && "Unexpected root");
   2936 
   2937   // We need the LHS to be (and SOMETHING, MASK). Find out what that mask is or
   2938   // abandon the effort.
   2939   SDValue LHS = N->getOperand(0);
   2940   if (LHS.getOpcode() != ISD::AND)
   2941     return SDValue();
   2942 
   2943   uint64_t LHSMask;
   2944   if (isa<ConstantSDNode>(LHS.getOperand(1)))
   2945     LHSMask = LHS->getConstantOperandVal(1);
   2946   else
   2947     return SDValue();
   2948 
   2949   // We also need the RHS to be (and SOMETHING, MASK). Find out what that mask
   2950   // is or abandon the effort.
   2951   SDValue RHS = N->getOperand(1);
   2952   if (RHS.getOpcode() != ISD::AND)
   2953     return SDValue();
   2954 
   2955   uint64_t RHSMask;
   2956   if (isa<ConstantSDNode>(RHS.getOperand(1)))
   2957     RHSMask = RHS->getConstantOperandVal(1);
   2958   else
   2959     return SDValue();
   2960 
   2961   // Can't do anything if the masks are incompatible.
   2962   if (LHSMask & RHSMask)
   2963     return SDValue();
   2964 
   2965   // Now we need one of the masks to be a contiguous field. Without loss of
   2966   // generality that should be the RHS one.
   2967   SDValue Bitfield = LHS.getOperand(0);
   2968   if (getLSBForBFI(DAG, DL, VT, Bitfield, LHSMask) != -1) {
   2969     // We know that LHS is a candidate new value, and RHS isn't already a better
   2970     // one.
   2971     std::swap(LHS, RHS);
   2972     std::swap(LHSMask, RHSMask);
   2973   }
   2974 
   2975   // We've done our best to put the right operands in the right places, all we
   2976   // can do now is check whether a BFI exists.
   2977   Bitfield = RHS.getOperand(0);
   2978   int32_t LSB = getLSBForBFI(DAG, DL, VT, Bitfield, RHSMask);
   2979   if (LSB == -1)
   2980     return SDValue();
   2981 
   2982   uint32_t Width = CountPopulation_64(RHSMask);
   2983   assert(Width && "Expected non-zero bitfield width");
   2984 
   2985   SDValue BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
   2986                             LHS.getOperand(0), Bitfield,
   2987                             DAG.getConstant(LSB, MVT::i64),
   2988                             DAG.getConstant(Width, MVT::i64));
   2989 
   2990   // Mask is trivial
   2991   if ((LHSMask | RHSMask) == (-1ULL >> (64 - VT.getSizeInBits())))
   2992     return BFI;
   2993 
   2994   return DAG.getNode(ISD::AND, DL, VT, BFI,
   2995                      DAG.getConstant(LHSMask | RHSMask, VT));
   2996 }
   2997 
   2998 /// Search for the bitwise combining (with careful masks) of a MaskedBFI and its
   2999 /// original input. This is surprisingly common because SROA splits things up
   3000 /// into i8 chunks, so the originally detected MaskedBFI may actually only act
   3001 /// on the low (say) byte of a word. This is then orred into the rest of the
   3002 /// word afterwards.
   3003 ///
   3004 /// Basic input: (or (and OLDFIELD, MASK1), (MaskedBFI MASK2, OLDFIELD, ...)).
   3005 ///
   3006 /// If MASK1 and MASK2 are compatible, we can fold the whole thing into the
   3007 /// MaskedBFI. We can also deal with a certain amount of extend/truncate being
   3008 /// involved.
   3009 static SDValue tryCombineToLargerBFI(SDNode *N,
   3010                                      TargetLowering::DAGCombinerInfo &DCI,
   3011                                      const AArch64Subtarget *Subtarget) {
   3012   SelectionDAG &DAG = DCI.DAG;
   3013   SDLoc DL(N);
   3014   EVT VT = N->getValueType(0);
   3015 
   3016   // First job is to hunt for a MaskedBFI on either the left or right. Swap
   3017   // operands if it's actually on the right.
   3018   SDValue BFI;
   3019   SDValue PossExtraMask;
   3020   uint64_t ExistingMask = 0;
   3021   bool Extended = false;
   3022   if (findMaskedBFI(N->getOperand(0), BFI, ExistingMask, Extended))
   3023     PossExtraMask = N->getOperand(1);
   3024   else if (findMaskedBFI(N->getOperand(1), BFI, ExistingMask, Extended))
   3025     PossExtraMask = N->getOperand(0);
   3026   else
   3027     return SDValue();
   3028 
   3029   // We can only combine a BFI with another compatible mask.
   3030   if (PossExtraMask.getOpcode() != ISD::AND ||
   3031       !isa<ConstantSDNode>(PossExtraMask.getOperand(1)))
   3032     return SDValue();
   3033 
   3034   uint64_t ExtraMask = PossExtraMask->getConstantOperandVal(1);
   3035 
   3036   // Masks must be compatible.
   3037   if (ExtraMask & ExistingMask)
   3038     return SDValue();
   3039 
   3040   SDValue OldBFIVal = BFI.getOperand(0);
   3041   SDValue NewBFIVal = BFI.getOperand(1);
   3042   if (Extended) {
   3043     // We skipped a ZERO_EXTEND above, so the input to the MaskedBFIs should be
   3044     // 32-bit and we'll be forming a 64-bit MaskedBFI. The MaskedBFI arguments
   3045     // need to be made compatible.
   3046     assert(VT == MVT::i64 && BFI.getValueType() == MVT::i32
   3047            && "Invalid types for BFI");
   3048     OldBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, OldBFIVal);
   3049     NewBFIVal = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NewBFIVal);
   3050   }
   3051 
   3052   // We need the MaskedBFI to be combined with a mask of the *same* value.
   3053   if (PossExtraMask.getOperand(0) != OldBFIVal)
   3054     return SDValue();
   3055 
   3056   BFI = DAG.getNode(AArch64ISD::BFI, DL, VT,
   3057                     OldBFIVal, NewBFIVal,
   3058                     BFI.getOperand(2), BFI.getOperand(3));
   3059 
   3060   // If the masking is trivial, we don't need to create it.
   3061   if ((ExtraMask | ExistingMask) == (-1ULL >> (64 - VT.getSizeInBits())))
   3062     return BFI;
   3063 
   3064   return DAG.getNode(ISD::AND, DL, VT, BFI,
   3065                      DAG.getConstant(ExtraMask | ExistingMask, VT));
   3066 }
   3067 
   3068 /// An EXTR instruction is made up of two shifts, ORed together. This helper
   3069 /// searches for and classifies those shifts.
   3070 static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount,
   3071                          bool &FromHi) {
   3072   if (N.getOpcode() == ISD::SHL)
   3073     FromHi = false;
   3074   else if (N.getOpcode() == ISD::SRL)
   3075     FromHi = true;
   3076   else
   3077     return false;
   3078 
   3079   if (!isa<ConstantSDNode>(N.getOperand(1)))
   3080     return false;
   3081 
   3082   ShiftAmount = N->getConstantOperandVal(1);
   3083   Src = N->getOperand(0);
   3084   return true;
   3085 }
   3086 
   3087 /// EXTR instruction extracts a contiguous chunk of bits from two existing
   3088 /// registers viewed as a high/low pair. This function looks for the pattern:
   3089 /// (or (shl VAL1, #N), (srl VAL2, #RegWidth-N)) and replaces it with an
   3090 /// EXTR. Can't quite be done in TableGen because the two immediates aren't
   3091 /// independent.
   3092 static SDValue tryCombineToEXTR(SDNode *N,
   3093                                 TargetLowering::DAGCombinerInfo &DCI) {
   3094   SelectionDAG &DAG = DCI.DAG;
   3095   SDLoc DL(N);
   3096   EVT VT = N->getValueType(0);
   3097 
   3098   assert(N->getOpcode() == ISD::OR && "Unexpected root");
   3099 
   3100   if (VT != MVT::i32 && VT != MVT::i64)
   3101     return SDValue();
   3102 
   3103   SDValue LHS;
   3104   uint32_t ShiftLHS = 0;
   3105   bool LHSFromHi = 0;
   3106   if (!findEXTRHalf(N->getOperand(0), LHS, ShiftLHS, LHSFromHi))
   3107     return SDValue();
   3108 
   3109   SDValue RHS;
   3110   uint32_t ShiftRHS = 0;
   3111   bool RHSFromHi = 0;
   3112   if (!findEXTRHalf(N->getOperand(1), RHS, ShiftRHS, RHSFromHi))
   3113     return SDValue();
   3114 
   3115   // If they're both trying to come from the high part of the register, they're
   3116   // not really an EXTR.
   3117   if (LHSFromHi == RHSFromHi)
   3118     return SDValue();
   3119 
   3120   if (ShiftLHS + ShiftRHS != VT.getSizeInBits())
   3121     return SDValue();
   3122 
   3123   if (LHSFromHi) {
   3124     std::swap(LHS, RHS);
   3125     std::swap(ShiftLHS, ShiftRHS);
   3126   }
   3127 
   3128   return DAG.getNode(AArch64ISD::EXTR, DL, VT,
   3129                      LHS, RHS,
   3130                      DAG.getConstant(ShiftRHS, MVT::i64));
   3131 }
   3132 
   3133 /// Target-specific dag combine xforms for ISD::OR
   3134 static SDValue PerformORCombine(SDNode *N,
   3135                                 TargetLowering::DAGCombinerInfo &DCI,
   3136                                 const AArch64Subtarget *Subtarget) {
   3137 
   3138   SelectionDAG &DAG = DCI.DAG;
   3139   SDLoc DL(N);
   3140   EVT VT = N->getValueType(0);
   3141 
   3142   if(!DAG.getTargetLoweringInfo().isTypeLegal(VT))
   3143     return SDValue();
   3144 
   3145   // Attempt to recognise bitfield-insert operations.
   3146   SDValue Res = tryCombineToBFI(N, DCI, Subtarget);
   3147   if (Res.getNode())
   3148     return Res;
   3149 
   3150   // Attempt to combine an existing MaskedBFI operation into one with a larger
   3151   // mask.
   3152   Res = tryCombineToLargerBFI(N, DCI, Subtarget);
   3153   if (Res.getNode())
   3154     return Res;
   3155 
   3156   Res = tryCombineToEXTR(N, DCI);
   3157   if (Res.getNode())
   3158     return Res;
   3159 
   3160   if (!Subtarget->hasNEON())
   3161     return SDValue();
   3162 
   3163   // Attempt to use vector immediate-form BSL
   3164   // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant.
   3165 
   3166   SDValue N0 = N->getOperand(0);
   3167   if (N0.getOpcode() != ISD::AND)
   3168     return SDValue();
   3169 
   3170   SDValue N1 = N->getOperand(1);
   3171   if (N1.getOpcode() != ISD::AND)
   3172     return SDValue();
   3173 
   3174   if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
   3175     APInt SplatUndef;
   3176     unsigned SplatBitSize;
   3177     bool HasAnyUndefs;
   3178     BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1));
   3179     APInt SplatBits0;
   3180     if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize,
   3181                                       HasAnyUndefs) &&
   3182         !HasAnyUndefs) {
   3183       BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1));
   3184       APInt SplatBits1;
   3185       if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize,
   3186                                         HasAnyUndefs) &&
   3187           !HasAnyUndefs && SplatBits0 == ~SplatBits1) {
   3188         // Canonicalize the vector type to make instruction selection simpler.
   3189         EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8;
   3190         SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT,
   3191                                      N0->getOperand(1), N0->getOperand(0),
   3192                                      N1->getOperand(0));
   3193         return DAG.getNode(ISD::BITCAST, DL, VT, Result);
   3194       }
   3195     }
   3196   }
   3197 
   3198   return SDValue();
   3199 }
   3200 
   3201 /// Target-specific dag combine xforms for ISD::SRA
   3202 static SDValue PerformSRACombine(SDNode *N,
   3203                                  TargetLowering::DAGCombinerInfo &DCI) {
   3204 
   3205   SelectionDAG &DAG = DCI.DAG;
   3206   SDLoc DL(N);
   3207   EVT VT = N->getValueType(0);
   3208 
   3209   // We're looking for an SRA/SHL pair which form an SBFX.
   3210 
   3211   if (VT != MVT::i32 && VT != MVT::i64)
   3212     return SDValue();
   3213 
   3214   if (!isa<ConstantSDNode>(N->getOperand(1)))
   3215     return SDValue();
   3216 
   3217   uint64_t ExtraSignBits = N->getConstantOperandVal(1);
   3218   SDValue Shift = N->getOperand(0);
   3219 
   3220   if (Shift.getOpcode() != ISD::SHL)
   3221     return SDValue();
   3222 
   3223   if (!isa<ConstantSDNode>(Shift->getOperand(1)))
   3224     return SDValue();
   3225 
   3226   uint64_t BitsOnLeft = Shift->getConstantOperandVal(1);
   3227   uint64_t Width = VT.getSizeInBits() - ExtraSignBits;
   3228   uint64_t LSB = VT.getSizeInBits() - Width - BitsOnLeft;
   3229 
   3230   if (LSB > VT.getSizeInBits() || Width > VT.getSizeInBits())
   3231     return SDValue();
   3232 
   3233   return DAG.getNode(AArch64ISD::SBFX, DL, VT, Shift.getOperand(0),
   3234                      DAG.getConstant(LSB, MVT::i64),
   3235                      DAG.getConstant(LSB + Width - 1, MVT::i64));
   3236 }
   3237 
   3238 
   3239 SDValue
   3240 AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   3241                                          DAGCombinerInfo &DCI) const {
   3242   switch (N->getOpcode()) {
   3243   default: break;
   3244   case ISD::AND: return PerformANDCombine(N, DCI);
   3245   case ISD::OR: return PerformORCombine(N, DCI, getSubtarget());
   3246   case ISD::SRA: return PerformSRACombine(N, DCI);
   3247   }
   3248   return SDValue();
   3249 }
   3250 
   3251 bool
   3252 AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
   3253   VT = VT.getScalarType();
   3254 
   3255   if (!VT.isSimple())
   3256     return false;
   3257 
   3258   switch (VT.getSimpleVT().SimpleTy) {
   3259   case MVT::f16:
   3260   case MVT::f32:
   3261   case MVT::f64:
   3262     return true;
   3263   case MVT::f128:
   3264     return false;
   3265   default:
   3266     break;
   3267   }
   3268 
   3269   return false;
   3270 }
   3271 
   3272 // If this is a case we can't handle, return null and let the default
   3273 // expansion code take care of it.
   3274 SDValue
   3275 AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   3276                                          const AArch64Subtarget *ST) const {
   3277 
   3278   BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
   3279   SDLoc DL(Op);
   3280   EVT VT = Op.getValueType();
   3281 
   3282   APInt SplatBits, SplatUndef;
   3283   unsigned SplatBitSize;
   3284   bool HasAnyUndefs;
   3285 
   3286   // Note we favor lowering MOVI over MVNI.
   3287   // This has implications on the definition of patterns in TableGen to select
   3288   // BIC immediate instructions but not ORR immediate instructions.
   3289   // If this lowering order is changed, TableGen patterns for BIC immediate and
   3290   // ORR immediate instructions have to be updated.
   3291   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
   3292     if (SplatBitSize <= 64) {
   3293       // First attempt to use vector immediate-form MOVI
   3294       EVT NeonMovVT;
   3295       unsigned Imm = 0;
   3296       unsigned OpCmode = 0;
   3297 
   3298       if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(),
   3299                             SplatBitSize, DAG, VT.is128BitVector(),
   3300                             Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) {
   3301         SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
   3302         SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
   3303 
   3304         if (ImmVal.getNode() && OpCmodeVal.getNode()) {
   3305           SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT,
   3306                                         ImmVal, OpCmodeVal);
   3307           return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
   3308         }
   3309       }
   3310 
   3311       // Then attempt to use vector immediate-form MVNI
   3312       uint64_t NegatedImm = (~SplatBits).getZExtValue();
   3313       if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize,
   3314                             DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT,
   3315                             Imm, OpCmode)) {
   3316         SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32);
   3317         SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32);
   3318         if (ImmVal.getNode() && OpCmodeVal.getNode()) {
   3319           SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT,
   3320                                         ImmVal, OpCmodeVal);
   3321           return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov);
   3322         }
   3323       }
   3324 
   3325       // Attempt to use vector immediate-form FMOV
   3326       if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) ||
   3327           (VT == MVT::v2f64 && SplatBitSize == 64)) {
   3328         APFloat RealVal(
   3329             SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble,
   3330             SplatBits);
   3331         uint32_t ImmVal;
   3332         if (A64Imms::isFPImm(RealVal, ImmVal)) {
   3333           SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32);
   3334           return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val);
   3335         }
   3336       }
   3337     }
   3338   }
   3339   return SDValue();
   3340 }
   3341 
   3342 AArch64TargetLowering::ConstraintType
   3343 AArch64TargetLowering::getConstraintType(const std::string &Constraint) const {
   3344   if (Constraint.size() == 1) {
   3345     switch (Constraint[0]) {
   3346     default: break;
   3347     case 'w': // An FP/SIMD vector register
   3348       return C_RegisterClass;
   3349     case 'I': // Constant that can be used with an ADD instruction
   3350     case 'J': // Constant that can be used with a SUB instruction
   3351     case 'K': // Constant that can be used with a 32-bit logical instruction
   3352     case 'L': // Constant that can be used with a 64-bit logical instruction
   3353     case 'M': // Constant that can be used as a 32-bit MOV immediate
   3354     case 'N': // Constant that can be used as a 64-bit MOV immediate
   3355     case 'Y': // Floating point constant zero
   3356     case 'Z': // Integer constant zero
   3357       return C_Other;
   3358     case 'Q': // A memory reference with base register and no offset
   3359       return C_Memory;
   3360     case 'S': // A symbolic address
   3361       return C_Other;
   3362     }
   3363   }
   3364 
   3365   // FIXME: Ump, Utf, Usa, Ush
   3366   // Ump: A memory address suitable for ldp/stp in SI, DI, SF and DF modes,
   3367   //      whatever they may be
   3368   // Utf: A memory address suitable for ldp/stp in TF mode, whatever it may be
   3369   // Usa: An absolute symbolic address
   3370   // Ush: The high part (bits 32:12) of a pc-relative symbolic address
   3371   assert(Constraint != "Ump" && Constraint != "Utf" && Constraint != "Usa"
   3372          && Constraint != "Ush" && "Unimplemented constraints");
   3373 
   3374   return TargetLowering::getConstraintType(Constraint);
   3375 }
   3376 
   3377 TargetLowering::ConstraintWeight
   3378 AArch64TargetLowering::getSingleConstraintMatchWeight(AsmOperandInfo &Info,
   3379                                                 const char *Constraint) const {
   3380 
   3381   llvm_unreachable("Constraint weight unimplemented");
   3382 }
   3383 
   3384 void
   3385 AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   3386                                                     std::string &Constraint,
   3387                                                     std::vector<SDValue> &Ops,
   3388                                                     SelectionDAG &DAG) const {
   3389   SDValue Result(0, 0);
   3390 
   3391   // Only length 1 constraints are C_Other.
   3392   if (Constraint.size() != 1) return;
   3393 
   3394   // Only C_Other constraints get lowered like this. That means constants for us
   3395   // so return early if there's no hope the constraint can be lowered.
   3396 
   3397   switch(Constraint[0]) {
   3398   default: break;
   3399   case 'I': case 'J': case 'K': case 'L':
   3400   case 'M': case 'N': case 'Z': {
   3401     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
   3402     if (!C)
   3403       return;
   3404 
   3405     uint64_t CVal = C->getZExtValue();
   3406     uint32_t Bits;
   3407 
   3408     switch (Constraint[0]) {
   3409     default:
   3410       // FIXME: 'M' and 'N' are MOV pseudo-insts -- unsupported in assembly. 'J'
   3411       // is a peculiarly useless SUB constraint.
   3412       llvm_unreachable("Unimplemented C_Other constraint");
   3413     case 'I':
   3414       if (CVal <= 0xfff)
   3415         break;
   3416       return;
   3417     case 'K':
   3418       if (A64Imms::isLogicalImm(32, CVal, Bits))
   3419         break;
   3420       return;
   3421     case 'L':
   3422       if (A64Imms::isLogicalImm(64, CVal, Bits))
   3423         break;
   3424       return;
   3425     case 'Z':
   3426       if (CVal == 0)
   3427         break;
   3428       return;
   3429     }
   3430 
   3431     Result = DAG.getTargetConstant(CVal, Op.getValueType());
   3432     break;
   3433   }
   3434   case 'S': {
   3435     // An absolute symbolic address or label reference.
   3436     if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
   3437       Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
   3438                                           GA->getValueType(0));
   3439     } else if (const BlockAddressSDNode *BA
   3440                  = dyn_cast<BlockAddressSDNode>(Op)) {
   3441       Result = DAG.getTargetBlockAddress(BA->getBlockAddress(),
   3442                                          BA->getValueType(0));
   3443     } else if (const ExternalSymbolSDNode *ES
   3444                  = dyn_cast<ExternalSymbolSDNode>(Op)) {
   3445       Result = DAG.getTargetExternalSymbol(ES->getSymbol(),
   3446                                            ES->getValueType(0));
   3447     } else
   3448       return;
   3449     break;
   3450   }
   3451   case 'Y':
   3452     if (const ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) {
   3453       if (CFP->isExactlyValue(0.0)) {
   3454         Result = DAG.getTargetConstantFP(0.0, CFP->getValueType(0));
   3455         break;
   3456       }
   3457     }
   3458     return;
   3459   }
   3460 
   3461   if (Result.getNode()) {
   3462     Ops.push_back(Result);
   3463     return;
   3464   }
   3465 
   3466   // It's an unknown constraint for us. Let generic code have a go.
   3467   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   3468 }
   3469 
   3470 std::pair<unsigned, const TargetRegisterClass*>
   3471 AArch64TargetLowering::getRegForInlineAsmConstraint(
   3472                                                   const std::string &Constraint,
   3473                                                   MVT VT) const {
   3474   if (Constraint.size() == 1) {
   3475     switch (Constraint[0]) {
   3476     case 'r':
   3477       if (VT.getSizeInBits() <= 32)
   3478         return std::make_pair(0U, &AArch64::GPR32RegClass);
   3479       else if (VT == MVT::i64)
   3480         return std::make_pair(0U, &AArch64::GPR64RegClass);
   3481       break;
   3482     case 'w':
   3483       if (VT == MVT::f16)
   3484         return std::make_pair(0U, &AArch64::FPR16RegClass);
   3485       else if (VT == MVT::f32)
   3486         return std::make_pair(0U, &AArch64::FPR32RegClass);
   3487       else if (VT == MVT::f64)
   3488         return std::make_pair(0U, &AArch64::FPR64RegClass);
   3489       else if (VT.getSizeInBits() == 64)
   3490         return std::make_pair(0U, &AArch64::VPR64RegClass);
   3491       else if (VT == MVT::f128)
   3492         return std::make_pair(0U, &AArch64::FPR128RegClass);
   3493       else if (VT.getSizeInBits() == 128)
   3494         return std::make_pair(0U, &AArch64::VPR128RegClass);
   3495       break;
   3496     }
   3497   }
   3498 
   3499   // Use the default implementation in TargetLowering to convert the register
   3500   // constraint into a member of a register class.
   3501   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
   3502 }
   3503