Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the AArch64 target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64TargetMachine.h"
     15 #include "MCTargetDesc/AArch64AddressingModes.h"
     16 #include "llvm/ADT/APSInt.h"
     17 #include "llvm/CodeGen/SelectionDAGISel.h"
     18 #include "llvm/IR/Function.h" // To access function attributes.
     19 #include "llvm/IR/GlobalValue.h"
     20 #include "llvm/IR/Intrinsics.h"
     21 #include "llvm/Support/Debug.h"
     22 #include "llvm/Support/ErrorHandling.h"
     23 #include "llvm/Support/MathExtras.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 
     26 using namespace llvm;
     27 
     28 #define DEBUG_TYPE "aarch64-isel"
     29 
     30 //===--------------------------------------------------------------------===//
     31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
     32 /// instructions for SelectionDAG operations.
     33 ///
     34 namespace {
     35 
     36 class AArch64DAGToDAGISel : public SelectionDAGISel {
     37   AArch64TargetMachine &TM;
     38 
     39   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     40   /// make the right decision when generating code for different targets.
     41   const AArch64Subtarget *Subtarget;
     42 
     43   bool ForCodeSize;
     44 
     45 public:
     46   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
     47                                CodeGenOpt::Level OptLevel)
     48       : SelectionDAGISel(tm, OptLevel), TM(tm), Subtarget(nullptr),
     49         ForCodeSize(false) {}
     50 
     51   const char *getPassName() const override {
     52     return "AArch64 Instruction Selection";
     53   }
     54 
     55   bool runOnMachineFunction(MachineFunction &MF) override {
     56     AttributeSet FnAttrs = MF.getFunction()->getAttributes();
     57     ForCodeSize =
     58         FnAttrs.hasAttribute(AttributeSet::FunctionIndex,
     59                              Attribute::OptimizeForSize) ||
     60         FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize);
     61     Subtarget = &TM.getSubtarget<AArch64Subtarget>();
     62     return SelectionDAGISel::runOnMachineFunction(MF);
     63   }
     64 
     65   SDNode *Select(SDNode *Node) override;
     66 
     67   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     68   /// inline asm expressions.
     69   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
     70                                     char ConstraintCode,
     71                                     std::vector<SDValue> &OutOps) override;
     72 
     73   SDNode *SelectMLAV64LaneV128(SDNode *N);
     74   SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
     75   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
     76   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     77   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     78   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     79     return SelectShiftedRegister(N, false, Reg, Shift);
     80   }
     81   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     82     return SelectShiftedRegister(N, true, Reg, Shift);
     83   }
     84   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
     85     return SelectAddrModeIndexed(N, 1, Base, OffImm);
     86   }
     87   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
     88     return SelectAddrModeIndexed(N, 2, Base, OffImm);
     89   }
     90   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
     91     return SelectAddrModeIndexed(N, 4, Base, OffImm);
     92   }
     93   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
     94     return SelectAddrModeIndexed(N, 8, Base, OffImm);
     95   }
     96   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
     97     return SelectAddrModeIndexed(N, 16, Base, OffImm);
     98   }
     99   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
    100     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
    101   }
    102   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
    103     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
    104   }
    105   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
    106     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
    107   }
    108   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
    109     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
    110   }
    111   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
    112     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
    113   }
    114 
    115   template<int Width>
    116   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
    117                          SDValue &SignExtend, SDValue &DoShift) {
    118     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    119   }
    120 
    121   template<int Width>
    122   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
    123                          SDValue &SignExtend, SDValue &DoShift) {
    124     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    125   }
    126 
    127 
    128   /// Form sequences of consecutive 64/128-bit registers for use in NEON
    129   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
    130   /// between 1 and 4 elements. If it contains a single element that is returned
    131   /// unchanged; otherwise a REG_SEQUENCE value is returned.
    132   SDValue createDTuple(ArrayRef<SDValue> Vecs);
    133   SDValue createQTuple(ArrayRef<SDValue> Vecs);
    134 
    135   /// Generic helper for the createDTuple/createQTuple
    136   /// functions. Those should almost always be called instead.
    137   SDValue createTuple(ArrayRef<SDValue> Vecs, unsigned RegClassIDs[],
    138                       unsigned SubRegs[]);
    139 
    140   SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
    141 
    142   SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
    143 
    144   SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    145                      unsigned SubRegIdx);
    146   SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    147                          unsigned SubRegIdx);
    148   SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    149   SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    150 
    151   SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    152   SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    153   SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    154   SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    155 
    156   SDNode *SelectBitfieldExtractOp(SDNode *N);
    157   SDNode *SelectBitfieldInsertOp(SDNode *N);
    158 
    159   SDNode *SelectLIBM(SDNode *N);
    160 
    161 // Include the pieces autogenerated from the target description.
    162 #include "AArch64GenDAGISel.inc"
    163 
    164 private:
    165   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
    166                              SDValue &Shift);
    167   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
    168                              SDValue &OffImm);
    169   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
    170                               SDValue &OffImm);
    171   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
    172                          SDValue &Offset, SDValue &SignExtend,
    173                          SDValue &DoShift);
    174   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
    175                          SDValue &Offset, SDValue &SignExtend,
    176                          SDValue &DoShift);
    177   bool isWorthFolding(SDValue V) const;
    178   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
    179                          SDValue &Offset, SDValue &SignExtend);
    180 
    181   template<unsigned RegWidth>
    182   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
    183     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
    184   }
    185 
    186   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
    187 };
    188 } // end anonymous namespace
    189 
    190 /// isIntImmediate - This method tests to see if the node is a constant
    191 /// operand. If so Imm will receive the 32-bit value.
    192 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
    193   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
    194     Imm = C->getZExtValue();
    195     return true;
    196   }
    197   return false;
    198 }
    199 
    200 // isIntImmediate - This method tests to see if a constant operand.
    201 // If so Imm will receive the value.
    202 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
    203   return isIntImmediate(N.getNode(), Imm);
    204 }
    205 
    206 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    207 // opcode and that it has a immediate integer right operand.
    208 // If so Imm will receive the 32 bit value.
    209 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
    210                                   uint64_t &Imm) {
    211   return N->getOpcode() == Opc &&
    212          isIntImmediate(N->getOperand(1).getNode(), Imm);
    213 }
    214 
    215 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
    216     const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
    217   assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
    218   // Require the address to be in a register.  That is safe for all AArch64
    219   // variants and it is hard to do anything much smarter without knowing
    220   // how the operand is used.
    221   OutOps.push_back(Op);
    222   return false;
    223 }
    224 
    225 /// SelectArithImmed - Select an immediate value that can be represented as
    226 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
    227 /// Val set to the 12-bit value and Shift set to the shifter operand.
    228 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
    229                                            SDValue &Shift) {
    230   // This function is called from the addsub_shifted_imm ComplexPattern,
    231   // which lists [imm] as the list of opcode it's interested in, however
    232   // we still need to check whether the operand is actually an immediate
    233   // here because the ComplexPattern opcode list is only used in
    234   // root-level opcode matching.
    235   if (!isa<ConstantSDNode>(N.getNode()))
    236     return false;
    237 
    238   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    239   unsigned ShiftAmt;
    240 
    241   if (Immed >> 12 == 0) {
    242     ShiftAmt = 0;
    243   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
    244     ShiftAmt = 12;
    245     Immed = Immed >> 12;
    246   } else
    247     return false;
    248 
    249   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
    250   Val = CurDAG->getTargetConstant(Immed, MVT::i32);
    251   Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
    252   return true;
    253 }
    254 
    255 /// SelectNegArithImmed - As above, but negates the value before trying to
    256 /// select it.
    257 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
    258                                               SDValue &Shift) {
    259   // This function is called from the addsub_shifted_imm ComplexPattern,
    260   // which lists [imm] as the list of opcode it's interested in, however
    261   // we still need to check whether the operand is actually an immediate
    262   // here because the ComplexPattern opcode list is only used in
    263   // root-level opcode matching.
    264   if (!isa<ConstantSDNode>(N.getNode()))
    265     return false;
    266 
    267   // The immediate operand must be a 24-bit zero-extended immediate.
    268   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    269 
    270   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
    271   // have the opposite effect on the C flag, so this pattern mustn't match under
    272   // those circumstances.
    273   if (Immed == 0)
    274     return false;
    275 
    276   if (N.getValueType() == MVT::i32)
    277     Immed = ~((uint32_t)Immed) + 1;
    278   else
    279     Immed = ~Immed + 1ULL;
    280   if (Immed & 0xFFFFFFFFFF000000ULL)
    281     return false;
    282 
    283   Immed &= 0xFFFFFFULL;
    284   return SelectArithImmed(CurDAG->getConstant(Immed, MVT::i32), Val, Shift);
    285 }
    286 
    287 /// getShiftTypeForNode - Translate a shift node to the corresponding
    288 /// ShiftType value.
    289 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
    290   switch (N.getOpcode()) {
    291   default:
    292     return AArch64_AM::InvalidShiftExtend;
    293   case ISD::SHL:
    294     return AArch64_AM::LSL;
    295   case ISD::SRL:
    296     return AArch64_AM::LSR;
    297   case ISD::SRA:
    298     return AArch64_AM::ASR;
    299   case ISD::ROTR:
    300     return AArch64_AM::ROR;
    301   }
    302 }
    303 
    304 /// \brief Determine wether it is worth to fold V into an extended register.
    305 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
    306   // it hurts if the a value is used at least twice, unless we are optimizing
    307   // for code size.
    308   if (ForCodeSize || V.hasOneUse())
    309     return true;
    310   return false;
    311 }
    312 
    313 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
    314 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
    315 /// instructions allow the shifted register to be rotated, but the arithmetic
    316 /// instructions do not.  The AllowROR parameter specifies whether ROR is
    317 /// supported.
    318 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
    319                                                 SDValue &Reg, SDValue &Shift) {
    320   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
    321   if (ShType == AArch64_AM::InvalidShiftExtend)
    322     return false;
    323   if (!AllowROR && ShType == AArch64_AM::ROR)
    324     return false;
    325 
    326   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    327     unsigned BitSize = N.getValueType().getSizeInBits();
    328     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
    329     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
    330 
    331     Reg = N.getOperand(0);
    332     Shift = CurDAG->getTargetConstant(ShVal, MVT::i32);
    333     return isWorthFolding(N);
    334   }
    335 
    336   return false;
    337 }
    338 
    339 /// getExtendTypeForNode - Translate an extend node to the corresponding
    340 /// ExtendType value.
    341 static AArch64_AM::ShiftExtendType
    342 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
    343   if (N.getOpcode() == ISD::SIGN_EXTEND ||
    344       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    345     EVT SrcVT;
    346     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
    347       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
    348     else
    349       SrcVT = N.getOperand(0).getValueType();
    350 
    351     if (!IsLoadStore && SrcVT == MVT::i8)
    352       return AArch64_AM::SXTB;
    353     else if (!IsLoadStore && SrcVT == MVT::i16)
    354       return AArch64_AM::SXTH;
    355     else if (SrcVT == MVT::i32)
    356       return AArch64_AM::SXTW;
    357     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    358 
    359     return AArch64_AM::InvalidShiftExtend;
    360   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
    361              N.getOpcode() == ISD::ANY_EXTEND) {
    362     EVT SrcVT = N.getOperand(0).getValueType();
    363     if (!IsLoadStore && SrcVT == MVT::i8)
    364       return AArch64_AM::UXTB;
    365     else if (!IsLoadStore && SrcVT == MVT::i16)
    366       return AArch64_AM::UXTH;
    367     else if (SrcVT == MVT::i32)
    368       return AArch64_AM::UXTW;
    369     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    370 
    371     return AArch64_AM::InvalidShiftExtend;
    372   } else if (N.getOpcode() == ISD::AND) {
    373     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    374     if (!CSD)
    375       return AArch64_AM::InvalidShiftExtend;
    376     uint64_t AndMask = CSD->getZExtValue();
    377 
    378     switch (AndMask) {
    379     default:
    380       return AArch64_AM::InvalidShiftExtend;
    381     case 0xFF:
    382       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
    383     case 0xFFFF:
    384       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
    385     case 0xFFFFFFFF:
    386       return AArch64_AM::UXTW;
    387     }
    388   }
    389 
    390   return AArch64_AM::InvalidShiftExtend;
    391 }
    392 
    393 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
    394 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
    395   if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
    396       DL->getOpcode() != AArch64ISD::DUPLANE32)
    397     return false;
    398 
    399   SDValue SV = DL->getOperand(0);
    400   if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
    401     return false;
    402 
    403   SDValue EV = SV.getOperand(1);
    404   if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
    405     return false;
    406 
    407   ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
    408   ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
    409   LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
    410   LaneOp = EV.getOperand(0);
    411 
    412   return true;
    413 }
    414 
    415 // Helper for SelectOpcV64LaneV128 - Recogzine operatinos where one operand is a
    416 // high lane extract.
    417 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
    418                              SDValue &LaneOp, int &LaneIdx) {
    419 
    420   if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
    421     std::swap(Op0, Op1);
    422     if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
    423       return false;
    424   }
    425   StdOp = Op1;
    426   return true;
    427 }
    428 
    429 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
    430 /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
    431 /// so that we don't emit unnecessary lane extracts.
    432 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
    433   SDValue Op0 = N->getOperand(0);
    434   SDValue Op1 = N->getOperand(1);
    435   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
    436   SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
    437   int LaneIdx = -1; // Will hold the lane index.
    438 
    439   if (Op1.getOpcode() != ISD::MUL ||
    440       !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    441                         LaneIdx)) {
    442     std::swap(Op0, Op1);
    443     if (Op1.getOpcode() != ISD::MUL ||
    444         !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    445                           LaneIdx))
    446       return nullptr;
    447   }
    448 
    449   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
    450 
    451   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
    452 
    453   unsigned MLAOpc = ~0U;
    454 
    455   switch (N->getSimpleValueType(0).SimpleTy) {
    456   default:
    457     llvm_unreachable("Unrecognized MLA.");
    458   case MVT::v4i16:
    459     MLAOpc = AArch64::MLAv4i16_indexed;
    460     break;
    461   case MVT::v8i16:
    462     MLAOpc = AArch64::MLAv8i16_indexed;
    463     break;
    464   case MVT::v2i32:
    465     MLAOpc = AArch64::MLAv2i32_indexed;
    466     break;
    467   case MVT::v4i32:
    468     MLAOpc = AArch64::MLAv4i32_indexed;
    469     break;
    470   }
    471 
    472   return CurDAG->getMachineNode(MLAOpc, SDLoc(N), N->getValueType(0), Ops);
    473 }
    474 
    475 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
    476   SDValue SMULLOp0;
    477   SDValue SMULLOp1;
    478   int LaneIdx;
    479 
    480   if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
    481                         LaneIdx))
    482     return nullptr;
    483 
    484   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, MVT::i64);
    485 
    486   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
    487 
    488   unsigned SMULLOpc = ~0U;
    489 
    490   if (IntNo == Intrinsic::aarch64_neon_smull) {
    491     switch (N->getSimpleValueType(0).SimpleTy) {
    492     default:
    493       llvm_unreachable("Unrecognized SMULL.");
    494     case MVT::v4i32:
    495       SMULLOpc = AArch64::SMULLv4i16_indexed;
    496       break;
    497     case MVT::v2i64:
    498       SMULLOpc = AArch64::SMULLv2i32_indexed;
    499       break;
    500     }
    501   } else if (IntNo == Intrinsic::aarch64_neon_umull) {
    502     switch (N->getSimpleValueType(0).SimpleTy) {
    503     default:
    504       llvm_unreachable("Unrecognized SMULL.");
    505     case MVT::v4i32:
    506       SMULLOpc = AArch64::UMULLv4i16_indexed;
    507       break;
    508     case MVT::v2i64:
    509       SMULLOpc = AArch64::UMULLv2i32_indexed;
    510       break;
    511     }
    512   } else
    513     llvm_unreachable("Unrecognized intrinsic.");
    514 
    515   return CurDAG->getMachineNode(SMULLOpc, SDLoc(N), N->getValueType(0), Ops);
    516 }
    517 
    518 /// Instructions that accept extend modifiers like UXTW expect the register
    519 /// being extended to be a GPR32, but the incoming DAG might be acting on a
    520 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
    521 /// this is the case.
    522 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
    523   if (N.getValueType() == MVT::i32)
    524     return N;
    525 
    526   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
    527   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    528                                                SDLoc(N), MVT::i32, N, SubReg);
    529   return SDValue(Node, 0);
    530 }
    531 
    532 
    533 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
    534 /// operand folds in an extend followed by an optional left shift.
    535 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
    536                                                       SDValue &Shift) {
    537   unsigned ShiftVal = 0;
    538   AArch64_AM::ShiftExtendType Ext;
    539 
    540   if (N.getOpcode() == ISD::SHL) {
    541     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    542     if (!CSD)
    543       return false;
    544     ShiftVal = CSD->getZExtValue();
    545     if (ShiftVal > 4)
    546       return false;
    547 
    548     Ext = getExtendTypeForNode(N.getOperand(0));
    549     if (Ext == AArch64_AM::InvalidShiftExtend)
    550       return false;
    551 
    552     Reg = N.getOperand(0).getOperand(0);
    553   } else {
    554     Ext = getExtendTypeForNode(N);
    555     if (Ext == AArch64_AM::InvalidShiftExtend)
    556       return false;
    557 
    558     Reg = N.getOperand(0);
    559   }
    560 
    561   // AArch64 mandates that the RHS of the operation must use the smallest
    562   // register classs that could contain the size being extended from.  Thus,
    563   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
    564   // there might not be an actual 32-bit value in the program.  We can
    565   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
    566   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
    567   Reg = narrowIfNeeded(CurDAG, Reg);
    568   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), MVT::i32);
    569   return isWorthFolding(N);
    570 }
    571 
    572 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
    573 /// immediate" address.  The "Size" argument is the size in bytes of the memory
    574 /// reference, which determines the scale.
    575 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
    576                                               SDValue &Base, SDValue &OffImm) {
    577   const TargetLowering *TLI = getTargetLowering();
    578   if (N.getOpcode() == ISD::FrameIndex) {
    579     int FI = cast<FrameIndexSDNode>(N)->getIndex();
    580     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
    581     OffImm = CurDAG->getTargetConstant(0, MVT::i64);
    582     return true;
    583   }
    584 
    585   if (N.getOpcode() == AArch64ISD::ADDlow) {
    586     GlobalAddressSDNode *GAN =
    587         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
    588     Base = N.getOperand(0);
    589     OffImm = N.getOperand(1);
    590     if (!GAN)
    591       return true;
    592 
    593     const GlobalValue *GV = GAN->getGlobal();
    594     unsigned Alignment = GV->getAlignment();
    595     const DataLayout *DL = TLI->getDataLayout();
    596     Type *Ty = GV->getType()->getElementType();
    597     if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin())
    598       Alignment = DL->getABITypeAlignment(Ty);
    599 
    600     if (Alignment >= Size)
    601       return true;
    602   }
    603 
    604   if (CurDAG->isBaseWithConstantOffset(N)) {
    605     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    606       int64_t RHSC = (int64_t)RHS->getZExtValue();
    607       unsigned Scale = Log2_32(Size);
    608       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
    609         Base = N.getOperand(0);
    610         if (Base.getOpcode() == ISD::FrameIndex) {
    611           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    612           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
    613         }
    614         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, MVT::i64);
    615         return true;
    616       }
    617     }
    618   }
    619 
    620   // Before falling back to our general case, check if the unscaled
    621   // instructions can handle this. If so, that's preferable.
    622   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
    623     return false;
    624 
    625   // Base only. The address will be materialized into a register before
    626   // the memory is accessed.
    627   //    add x0, Xbase, #offset
    628   //    ldr x0, [x0]
    629   Base = N;
    630   OffImm = CurDAG->getTargetConstant(0, MVT::i64);
    631   return true;
    632 }
    633 
    634 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
    635 /// immediate" address.  This should only match when there is an offset that
    636 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
    637 /// is the size in bytes of the memory reference, which is needed here to know
    638 /// what is valid for a scaled immediate.
    639 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
    640                                                  SDValue &Base,
    641                                                  SDValue &OffImm) {
    642   if (!CurDAG->isBaseWithConstantOffset(N))
    643     return false;
    644   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    645     int64_t RHSC = RHS->getSExtValue();
    646     // If the offset is valid as a scaled immediate, don't match here.
    647     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
    648         RHSC < (0x1000 << Log2_32(Size)))
    649       return false;
    650     if (RHSC >= -256 && RHSC < 256) {
    651       Base = N.getOperand(0);
    652       if (Base.getOpcode() == ISD::FrameIndex) {
    653         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    654         const TargetLowering *TLI = getTargetLowering();
    655         Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
    656       }
    657       OffImm = CurDAG->getTargetConstant(RHSC, MVT::i64);
    658       return true;
    659     }
    660   }
    661   return false;
    662 }
    663 
    664 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
    665   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
    666   SDValue ImpDef = SDValue(
    667       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SDLoc(N), MVT::i64),
    668       0);
    669   MachineSDNode *Node = CurDAG->getMachineNode(
    670       TargetOpcode::INSERT_SUBREG, SDLoc(N), MVT::i64, ImpDef, N, SubReg);
    671   return SDValue(Node, 0);
    672 }
    673 
    674 /// \brief Check if the given SHL node (\p N), can be used to form an
    675 /// extended register for an addressing mode.
    676 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
    677                                             bool WantExtend, SDValue &Offset,
    678                                             SDValue &SignExtend) {
    679   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
    680   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    681   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
    682     return false;
    683 
    684   if (WantExtend) {
    685     AArch64_AM::ShiftExtendType Ext =
    686         getExtendTypeForNode(N.getOperand(0), true);
    687     if (Ext == AArch64_AM::InvalidShiftExtend)
    688       return false;
    689 
    690     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
    691     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
    692   } else {
    693     Offset = N.getOperand(0);
    694     SignExtend = CurDAG->getTargetConstant(0, MVT::i32);
    695   }
    696 
    697   unsigned LegalShiftVal = Log2_32(Size);
    698   unsigned ShiftVal = CSD->getZExtValue();
    699 
    700   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
    701     return false;
    702 
    703   if (isWorthFolding(N))
    704     return true;
    705 
    706   return false;
    707 }
    708 
    709 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
    710                                             SDValue &Base, SDValue &Offset,
    711                                             SDValue &SignExtend,
    712                                             SDValue &DoShift) {
    713   if (N.getOpcode() != ISD::ADD)
    714     return false;
    715   SDValue LHS = N.getOperand(0);
    716   SDValue RHS = N.getOperand(1);
    717 
    718   // We don't want to match immediate adds here, because they are better lowered
    719   // to the register-immediate addressing modes.
    720   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
    721     return false;
    722 
    723   // Check if this particular node is reused in any non-memory related
    724   // operation.  If yes, do not try to fold this node into the address
    725   // computation, since the computation will be kept.
    726   const SDNode *Node = N.getNode();
    727   for (SDNode *UI : Node->uses()) {
    728     if (!isa<MemSDNode>(*UI))
    729       return false;
    730   }
    731 
    732   // Remember if it is worth folding N when it produces extended register.
    733   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
    734 
    735   // Try to match a shifted extend on the RHS.
    736   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
    737       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
    738     Base = LHS;
    739     DoShift = CurDAG->getTargetConstant(true, MVT::i32);
    740     return true;
    741   }
    742 
    743   // Try to match a shifted extend on the LHS.
    744   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
    745       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
    746     Base = RHS;
    747     DoShift = CurDAG->getTargetConstant(true, MVT::i32);
    748     return true;
    749   }
    750 
    751   // There was no shift, whatever else we find.
    752   DoShift = CurDAG->getTargetConstant(false, MVT::i32);
    753 
    754   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
    755   // Try to match an unshifted extend on the LHS.
    756   if (IsExtendedRegisterWorthFolding &&
    757       (Ext = getExtendTypeForNode(LHS, true)) !=
    758           AArch64_AM::InvalidShiftExtend) {
    759     Base = RHS;
    760     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
    761     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
    762     if (isWorthFolding(LHS))
    763       return true;
    764   }
    765 
    766   // Try to match an unshifted extend on the RHS.
    767   if (IsExtendedRegisterWorthFolding &&
    768       (Ext = getExtendTypeForNode(RHS, true)) !=
    769           AArch64_AM::InvalidShiftExtend) {
    770     Base = LHS;
    771     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
    772     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, MVT::i32);
    773     if (isWorthFolding(RHS))
    774       return true;
    775   }
    776 
    777   return false;
    778 }
    779 
    780 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
    781                                             SDValue &Base, SDValue &Offset,
    782                                             SDValue &SignExtend,
    783                                             SDValue &DoShift) {
    784   if (N.getOpcode() != ISD::ADD)
    785     return false;
    786   SDValue LHS = N.getOperand(0);
    787   SDValue RHS = N.getOperand(1);
    788 
    789   // We don't want to match immediate adds here, because they are better lowered
    790   // to the register-immediate addressing modes.
    791   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
    792     return false;
    793 
    794   // Check if this particular node is reused in any non-memory related
    795   // operation.  If yes, do not try to fold this node into the address
    796   // computation, since the computation will be kept.
    797   const SDNode *Node = N.getNode();
    798   for (SDNode *UI : Node->uses()) {
    799     if (!isa<MemSDNode>(*UI))
    800       return false;
    801   }
    802 
    803   // Remember if it is worth folding N when it produces extended register.
    804   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
    805 
    806   // Try to match a shifted extend on the RHS.
    807   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
    808       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
    809     Base = LHS;
    810     DoShift = CurDAG->getTargetConstant(true, MVT::i32);
    811     return true;
    812   }
    813 
    814   // Try to match a shifted extend on the LHS.
    815   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
    816       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
    817     Base = RHS;
    818     DoShift = CurDAG->getTargetConstant(true, MVT::i32);
    819     return true;
    820   }
    821 
    822   // Match any non-shifted, non-extend, non-immediate add expression.
    823   Base = LHS;
    824   Offset = RHS;
    825   SignExtend = CurDAG->getTargetConstant(false, MVT::i32);
    826   DoShift = CurDAG->getTargetConstant(false, MVT::i32);
    827   // Reg1 + Reg2 is free: no check needed.
    828   return true;
    829 }
    830 
    831 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
    832   static unsigned RegClassIDs[] = {
    833       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
    834   static unsigned SubRegs[] = { AArch64::dsub0, AArch64::dsub1,
    835                                 AArch64::dsub2, AArch64::dsub3 };
    836 
    837   return createTuple(Regs, RegClassIDs, SubRegs);
    838 }
    839 
    840 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
    841   static unsigned RegClassIDs[] = {
    842       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
    843   static unsigned SubRegs[] = { AArch64::qsub0, AArch64::qsub1,
    844                                 AArch64::qsub2, AArch64::qsub3 };
    845 
    846   return createTuple(Regs, RegClassIDs, SubRegs);
    847 }
    848 
    849 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
    850                                          unsigned RegClassIDs[],
    851                                          unsigned SubRegs[]) {
    852   // There's no special register-class for a vector-list of 1 element: it's just
    853   // a vector.
    854   if (Regs.size() == 1)
    855     return Regs[0];
    856 
    857   assert(Regs.size() >= 2 && Regs.size() <= 4);
    858 
    859   SDLoc DL(Regs[0].getNode());
    860 
    861   SmallVector<SDValue, 4> Ops;
    862 
    863   // First operand of REG_SEQUENCE is the desired RegClass.
    864   Ops.push_back(
    865       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32));
    866 
    867   // Then we get pairs of source & subregister-position for the components.
    868   for (unsigned i = 0; i < Regs.size(); ++i) {
    869     Ops.push_back(Regs[i]);
    870     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32));
    871   }
    872 
    873   SDNode *N =
    874       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
    875   return SDValue(N, 0);
    876 }
    877 
    878 SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
    879                                          unsigned Opc, bool isExt) {
    880   SDLoc dl(N);
    881   EVT VT = N->getValueType(0);
    882 
    883   unsigned ExtOff = isExt;
    884 
    885   // Form a REG_SEQUENCE to force register allocation.
    886   unsigned Vec0Off = ExtOff + 1;
    887   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
    888                                N->op_begin() + Vec0Off + NumVecs);
    889   SDValue RegSeq = createQTuple(Regs);
    890 
    891   SmallVector<SDValue, 6> Ops;
    892   if (isExt)
    893     Ops.push_back(N->getOperand(1));
    894   Ops.push_back(RegSeq);
    895   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
    896   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
    897 }
    898 
    899 SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
    900   LoadSDNode *LD = cast<LoadSDNode>(N);
    901   if (LD->isUnindexed())
    902     return nullptr;
    903   EVT VT = LD->getMemoryVT();
    904   EVT DstVT = N->getValueType(0);
    905   ISD::MemIndexedMode AM = LD->getAddressingMode();
    906   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
    907 
    908   // We're not doing validity checking here. That was done when checking
    909   // if we should mark the load as indexed or not. We're just selecting
    910   // the right instruction.
    911   unsigned Opcode = 0;
    912 
    913   ISD::LoadExtType ExtType = LD->getExtensionType();
    914   bool InsertTo64 = false;
    915   if (VT == MVT::i64)
    916     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
    917   else if (VT == MVT::i32) {
    918     if (ExtType == ISD::NON_EXTLOAD)
    919       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
    920     else if (ExtType == ISD::SEXTLOAD)
    921       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
    922     else {
    923       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
    924       InsertTo64 = true;
    925       // The result of the load is only i32. It's the subreg_to_reg that makes
    926       // it into an i64.
    927       DstVT = MVT::i32;
    928     }
    929   } else if (VT == MVT::i16) {
    930     if (ExtType == ISD::SEXTLOAD) {
    931       if (DstVT == MVT::i64)
    932         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
    933       else
    934         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
    935     } else {
    936       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
    937       InsertTo64 = DstVT == MVT::i64;
    938       // The result of the load is only i32. It's the subreg_to_reg that makes
    939       // it into an i64.
    940       DstVT = MVT::i32;
    941     }
    942   } else if (VT == MVT::i8) {
    943     if (ExtType == ISD::SEXTLOAD) {
    944       if (DstVT == MVT::i64)
    945         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
    946       else
    947         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
    948     } else {
    949       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
    950       InsertTo64 = DstVT == MVT::i64;
    951       // The result of the load is only i32. It's the subreg_to_reg that makes
    952       // it into an i64.
    953       DstVT = MVT::i32;
    954     }
    955   } else if (VT == MVT::f32) {
    956     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
    957   } else if (VT == MVT::f64 || VT.is64BitVector()) {
    958     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
    959   } else if (VT.is128BitVector()) {
    960     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
    961   } else
    962     return nullptr;
    963   SDValue Chain = LD->getChain();
    964   SDValue Base = LD->getBasePtr();
    965   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
    966   int OffsetVal = (int)OffsetOp->getZExtValue();
    967   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, MVT::i64);
    968   SDValue Ops[] = { Base, Offset, Chain };
    969   SDNode *Res = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i64, DstVT,
    970                                        MVT::Other, Ops);
    971   // Either way, we're replacing the node, so tell the caller that.
    972   Done = true;
    973   SDValue LoadedVal = SDValue(Res, 1);
    974   if (InsertTo64) {
    975     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
    976     LoadedVal =
    977         SDValue(CurDAG->getMachineNode(
    978                     AArch64::SUBREG_TO_REG, SDLoc(N), MVT::i64,
    979                     CurDAG->getTargetConstant(0, MVT::i64), LoadedVal, SubReg),
    980                 0);
    981   }
    982 
    983   ReplaceUses(SDValue(N, 0), LoadedVal);
    984   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
    985   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
    986 
    987   return nullptr;
    988 }
    989 
    990 SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
    991                                         unsigned Opc, unsigned SubRegIdx) {
    992   SDLoc dl(N);
    993   EVT VT = N->getValueType(0);
    994   SDValue Chain = N->getOperand(0);
    995 
    996   SmallVector<SDValue, 6> Ops;
    997   Ops.push_back(N->getOperand(2)); // Mem operand;
    998   Ops.push_back(Chain);
    999 
   1000   std::vector<EVT> ResTys;
   1001   ResTys.push_back(MVT::Untyped);
   1002   ResTys.push_back(MVT::Other);
   1003 
   1004   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1005   SDValue SuperReg = SDValue(Ld, 0);
   1006   for (unsigned i = 0; i < NumVecs; ++i)
   1007     ReplaceUses(SDValue(N, i),
   1008         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1009 
   1010   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1011   return nullptr;
   1012 }
   1013 
   1014 SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   1015                                             unsigned Opc, unsigned SubRegIdx) {
   1016   SDLoc dl(N);
   1017   EVT VT = N->getValueType(0);
   1018   SDValue Chain = N->getOperand(0);
   1019 
   1020   SmallVector<SDValue, 6> Ops;
   1021   Ops.push_back(N->getOperand(1)); // Mem operand
   1022   Ops.push_back(N->getOperand(2)); // Incremental
   1023   Ops.push_back(Chain);
   1024 
   1025   std::vector<EVT> ResTys;
   1026   ResTys.push_back(MVT::i64); // Type of the write back register
   1027   ResTys.push_back(MVT::Untyped);
   1028   ResTys.push_back(MVT::Other);
   1029 
   1030   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1031 
   1032   // Update uses of write back register
   1033   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1034 
   1035   // Update uses of vector list
   1036   SDValue SuperReg = SDValue(Ld, 1);
   1037   if (NumVecs == 1)
   1038     ReplaceUses(SDValue(N, 0), SuperReg);
   1039   else
   1040     for (unsigned i = 0; i < NumVecs; ++i)
   1041       ReplaceUses(SDValue(N, i),
   1042           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1043 
   1044   // Update the chain
   1045   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1046   return nullptr;
   1047 }
   1048 
   1049 SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   1050                                          unsigned Opc) {
   1051   SDLoc dl(N);
   1052   EVT VT = N->getOperand(2)->getValueType(0);
   1053 
   1054   // Form a REG_SEQUENCE to force register allocation.
   1055   bool Is128Bit = VT.getSizeInBits() == 128;
   1056   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1057   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1058 
   1059   SmallVector<SDValue, 6> Ops;
   1060   Ops.push_back(RegSeq);
   1061   Ops.push_back(N->getOperand(NumVecs + 2));
   1062   Ops.push_back(N->getOperand(0));
   1063   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
   1064 
   1065   return St;
   1066 }
   1067 
   1068 SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
   1069                                              unsigned Opc) {
   1070   SDLoc dl(N);
   1071   EVT VT = N->getOperand(2)->getValueType(0);
   1072   SmallVector<EVT, 2> ResTys;
   1073   ResTys.push_back(MVT::i64);   // Type of the write back register
   1074   ResTys.push_back(MVT::Other); // Type for the Chain
   1075 
   1076   // Form a REG_SEQUENCE to force register allocation.
   1077   bool Is128Bit = VT.getSizeInBits() == 128;
   1078   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1079   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1080 
   1081   SmallVector<SDValue, 6> Ops;
   1082   Ops.push_back(RegSeq);
   1083   Ops.push_back(N->getOperand(NumVecs + 1)); // base register
   1084   Ops.push_back(N->getOperand(NumVecs + 2)); // Incremental
   1085   Ops.push_back(N->getOperand(0)); // Chain
   1086   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1087 
   1088   return St;
   1089 }
   1090 
   1091 /// WidenVector - Given a value in the V64 register class, produce the
   1092 /// equivalent value in the V128 register class.
   1093 class WidenVector {
   1094   SelectionDAG &DAG;
   1095 
   1096 public:
   1097   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
   1098 
   1099   SDValue operator()(SDValue V64Reg) {
   1100     EVT VT = V64Reg.getValueType();
   1101     unsigned NarrowSize = VT.getVectorNumElements();
   1102     MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1103     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
   1104     SDLoc DL(V64Reg);
   1105 
   1106     SDValue Undef =
   1107         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
   1108     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
   1109   }
   1110 };
   1111 
   1112 /// NarrowVector - Given a value in the V128 register class, produce the
   1113 /// equivalent value in the V64 register class.
   1114 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
   1115   EVT VT = V128Reg.getValueType();
   1116   unsigned WideSize = VT.getVectorNumElements();
   1117   MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1118   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
   1119 
   1120   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
   1121                                     V128Reg);
   1122 }
   1123 
   1124 SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   1125                                             unsigned Opc) {
   1126   SDLoc dl(N);
   1127   EVT VT = N->getValueType(0);
   1128   bool Narrow = VT.getSizeInBits() == 64;
   1129 
   1130   // Form a REG_SEQUENCE to force register allocation.
   1131   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1132 
   1133   if (Narrow)
   1134     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1135                    WidenVector(*CurDAG));
   1136 
   1137   SDValue RegSeq = createQTuple(Regs);
   1138 
   1139   std::vector<EVT> ResTys;
   1140   ResTys.push_back(MVT::Untyped);
   1141   ResTys.push_back(MVT::Other);
   1142 
   1143   unsigned LaneNo =
   1144       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1145 
   1146   SmallVector<SDValue, 6> Ops;
   1147   Ops.push_back(RegSeq);
   1148   Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
   1149   Ops.push_back(N->getOperand(NumVecs + 3));
   1150   Ops.push_back(N->getOperand(0));
   1151   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1152   SDValue SuperReg = SDValue(Ld, 0);
   1153 
   1154   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1155   static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
   1156                               AArch64::qsub3 };
   1157   for (unsigned i = 0; i < NumVecs; ++i) {
   1158     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
   1159     if (Narrow)
   1160       NV = NarrowVector(NV, *CurDAG);
   1161     ReplaceUses(SDValue(N, i), NV);
   1162   }
   1163 
   1164   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1165 
   1166   return Ld;
   1167 }
   1168 
   1169 SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
   1170                                                 unsigned Opc) {
   1171   SDLoc dl(N);
   1172   EVT VT = N->getValueType(0);
   1173   bool Narrow = VT.getSizeInBits() == 64;
   1174 
   1175   // Form a REG_SEQUENCE to force register allocation.
   1176   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1177 
   1178   if (Narrow)
   1179     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1180                    WidenVector(*CurDAG));
   1181 
   1182   SDValue RegSeq = createQTuple(Regs);
   1183 
   1184   std::vector<EVT> ResTys;
   1185   ResTys.push_back(MVT::i64); // Type of the write back register
   1186   ResTys.push_back(MVT::Untyped);
   1187   ResTys.push_back(MVT::Other);
   1188 
   1189   unsigned LaneNo =
   1190       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1191 
   1192   SmallVector<SDValue, 6> Ops;
   1193   Ops.push_back(RegSeq);
   1194   Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64)); // Lane Number
   1195   Ops.push_back(N->getOperand(NumVecs + 2)); // Base register
   1196   Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
   1197   Ops.push_back(N->getOperand(0));
   1198   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1199 
   1200   // Update uses of the write back register
   1201   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1202 
   1203   // Update uses of the vector list
   1204   SDValue SuperReg = SDValue(Ld, 1);
   1205   if (NumVecs == 1) {
   1206     ReplaceUses(SDValue(N, 0),
   1207                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
   1208   } else {
   1209     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1210     static unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, AArch64::qsub2,
   1211                                 AArch64::qsub3 };
   1212     for (unsigned i = 0; i < NumVecs; ++i) {
   1213       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
   1214                                                   SuperReg);
   1215       if (Narrow)
   1216         NV = NarrowVector(NV, *CurDAG);
   1217       ReplaceUses(SDValue(N, i), NV);
   1218     }
   1219   }
   1220 
   1221   // Update the Chain
   1222   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1223 
   1224   return Ld;
   1225 }
   1226 
   1227 SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   1228                                              unsigned Opc) {
   1229   SDLoc dl(N);
   1230   EVT VT = N->getOperand(2)->getValueType(0);
   1231   bool Narrow = VT.getSizeInBits() == 64;
   1232 
   1233   // Form a REG_SEQUENCE to force register allocation.
   1234   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1235 
   1236   if (Narrow)
   1237     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1238                    WidenVector(*CurDAG));
   1239 
   1240   SDValue RegSeq = createQTuple(Regs);
   1241 
   1242   unsigned LaneNo =
   1243       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1244 
   1245   SmallVector<SDValue, 6> Ops;
   1246   Ops.push_back(RegSeq);
   1247   Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
   1248   Ops.push_back(N->getOperand(NumVecs + 3));
   1249   Ops.push_back(N->getOperand(0));
   1250   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
   1251 
   1252   // Transfer memoperands.
   1253   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1254   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1255   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   1256 
   1257   return St;
   1258 }
   1259 
   1260 SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
   1261                                                  unsigned Opc) {
   1262   SDLoc dl(N);
   1263   EVT VT = N->getOperand(2)->getValueType(0);
   1264   bool Narrow = VT.getSizeInBits() == 64;
   1265 
   1266   // Form a REG_SEQUENCE to force register allocation.
   1267   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1268 
   1269   if (Narrow)
   1270     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1271                    WidenVector(*CurDAG));
   1272 
   1273   SDValue RegSeq = createQTuple(Regs);
   1274 
   1275   SmallVector<EVT, 2> ResTys;
   1276   ResTys.push_back(MVT::i64);   // Type of the write back register
   1277   ResTys.push_back(MVT::Other);
   1278 
   1279   unsigned LaneNo =
   1280       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1281 
   1282   SmallVector<SDValue, 6> Ops;
   1283   Ops.push_back(RegSeq);
   1284   Ops.push_back(CurDAG->getTargetConstant(LaneNo, MVT::i64));
   1285   Ops.push_back(N->getOperand(NumVecs + 2)); // Base Register
   1286   Ops.push_back(N->getOperand(NumVecs + 3)); // Incremental
   1287   Ops.push_back(N->getOperand(0));
   1288   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1289 
   1290   // Transfer memoperands.
   1291   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1292   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1293   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   1294 
   1295   return St;
   1296 }
   1297 
   1298 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
   1299                                        unsigned &Opc, SDValue &Opd0,
   1300                                        unsigned &LSB, unsigned &MSB,
   1301                                        unsigned NumberOfIgnoredLowBits,
   1302                                        bool BiggerPattern) {
   1303   assert(N->getOpcode() == ISD::AND &&
   1304          "N must be a AND operation to call this function");
   1305 
   1306   EVT VT = N->getValueType(0);
   1307 
   1308   // Here we can test the type of VT and return false when the type does not
   1309   // match, but since it is done prior to that call in the current context
   1310   // we turned that into an assert to avoid redundant code.
   1311   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1312          "Type checking must have been done before calling this function");
   1313 
   1314   // FIXME: simplify-demanded-bits in DAGCombine will probably have
   1315   // changed the AND node to a 32-bit mask operation. We'll have to
   1316   // undo that as part of the transform here if we want to catch all
   1317   // the opportunities.
   1318   // Currently the NumberOfIgnoredLowBits argument helps to recover
   1319   // form these situations when matching bigger pattern (bitfield insert).
   1320 
   1321   // For unsigned extracts, check for a shift right and mask
   1322   uint64_t And_imm = 0;
   1323   if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
   1324     return false;
   1325 
   1326   const SDNode *Op0 = N->getOperand(0).getNode();
   1327 
   1328   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
   1329   // simplified. Try to undo that
   1330   And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
   1331 
   1332   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
   1333   if (And_imm & (And_imm + 1))
   1334     return false;
   1335 
   1336   bool ClampMSB = false;
   1337   uint64_t Srl_imm = 0;
   1338   // Handle the SRL + ANY_EXTEND case.
   1339   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
   1340       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
   1341     // Extend the incoming operand of the SRL to 64-bit.
   1342     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
   1343     // Make sure to clamp the MSB so that we preserve the semantics of the
   1344     // original operations.
   1345     ClampMSB = true;
   1346   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
   1347              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
   1348                                    Srl_imm)) {
   1349     // If the shift result was truncated, we can still combine them.
   1350     Opd0 = Op0->getOperand(0).getOperand(0);
   1351 
   1352     // Use the type of SRL node.
   1353     VT = Opd0->getValueType(0);
   1354   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
   1355     Opd0 = Op0->getOperand(0);
   1356   } else if (BiggerPattern) {
   1357     // Let's pretend a 0 shift right has been performed.
   1358     // The resulting code will be at least as good as the original one
   1359     // plus it may expose more opportunities for bitfield insert pattern.
   1360     // FIXME: Currently we limit this to the bigger pattern, because
   1361     // some optimizations expect AND and not UBFM
   1362     Opd0 = N->getOperand(0);
   1363   } else
   1364     return false;
   1365 
   1366   assert((BiggerPattern || (Srl_imm > 0 && Srl_imm < VT.getSizeInBits())) &&
   1367          "bad amount in shift node!");
   1368 
   1369   LSB = Srl_imm;
   1370   MSB = Srl_imm + (VT == MVT::i32 ? CountTrailingOnes_32(And_imm)
   1371                                   : CountTrailingOnes_64(And_imm)) -
   1372         1;
   1373   if (ClampMSB)
   1374     // Since we're moving the extend before the right shift operation, we need
   1375     // to clamp the MSB to make sure we don't shift in undefined bits instead of
   1376     // the zeros which would get shifted in with the original right shift
   1377     // operation.
   1378     MSB = MSB > 31 ? 31 : MSB;
   1379 
   1380   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   1381   return true;
   1382 }
   1383 
   1384 static bool isOneBitExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   1385                                      unsigned &LSB, unsigned &MSB) {
   1386   // We are looking for the following pattern which basically extracts a single
   1387   // bit from the source value and places it in the LSB of the destination
   1388   // value, all other bits of the destination value or set to zero:
   1389   //
   1390   // Value2 = AND Value, MaskImm
   1391   // SRL Value2, ShiftImm
   1392   //
   1393   // with MaskImm >> ShiftImm == 1.
   1394   //
   1395   // This gets selected into a single UBFM:
   1396   //
   1397   // UBFM Value, ShiftImm, ShiftImm
   1398   //
   1399 
   1400   if (N->getOpcode() != ISD::SRL)
   1401     return false;
   1402 
   1403   uint64_t And_mask = 0;
   1404   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
   1405     return false;
   1406 
   1407   Opd0 = N->getOperand(0).getOperand(0);
   1408 
   1409   uint64_t Srl_imm = 0;
   1410   if (!isIntImmediate(N->getOperand(1), Srl_imm))
   1411     return false;
   1412 
   1413   // Check whether we really have a one bit extract here.
   1414   if (And_mask >> Srl_imm == 0x1) {
   1415     if (N->getValueType(0) == MVT::i32)
   1416       Opc = AArch64::UBFMWri;
   1417     else
   1418       Opc = AArch64::UBFMXri;
   1419 
   1420     LSB = MSB = Srl_imm;
   1421 
   1422     return true;
   1423   }
   1424 
   1425   return false;
   1426 }
   1427 
   1428 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   1429                                        unsigned &LSB, unsigned &MSB,
   1430                                        bool BiggerPattern) {
   1431   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
   1432          "N must be a SHR/SRA operation to call this function");
   1433 
   1434   EVT VT = N->getValueType(0);
   1435 
   1436   // Here we can test the type of VT and return false when the type does not
   1437   // match, but since it is done prior to that call in the current context
   1438   // we turned that into an assert to avoid redundant code.
   1439   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1440          "Type checking must have been done before calling this function");
   1441 
   1442   // Check for AND + SRL doing a one bit extract.
   1443   if (isOneBitExtractOpFromShr(N, Opc, Opd0, LSB, MSB))
   1444     return true;
   1445 
   1446   // we're looking for a shift of a shift
   1447   uint64_t Shl_imm = 0;
   1448   uint64_t Trunc_bits = 0;
   1449   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
   1450     Opd0 = N->getOperand(0).getOperand(0);
   1451   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
   1452              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
   1453     // We are looking for a shift of truncate. Truncate from i64 to i32 could
   1454     // be considered as setting high 32 bits as zero. Our strategy here is to
   1455     // always generate 64bit UBFM. This consistency will help the CSE pass
   1456     // later find more redundancy.
   1457     Opd0 = N->getOperand(0).getOperand(0);
   1458     Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
   1459     VT = Opd0->getValueType(0);
   1460     assert(VT == MVT::i64 && "the promoted type should be i64");
   1461   } else if (BiggerPattern) {
   1462     // Let's pretend a 0 shift left has been performed.
   1463     // FIXME: Currently we limit this to the bigger pattern case,
   1464     // because some optimizations expect AND and not UBFM
   1465     Opd0 = N->getOperand(0);
   1466   } else
   1467     return false;
   1468 
   1469   assert(Shl_imm < VT.getSizeInBits() && "bad amount in shift node!");
   1470   uint64_t Srl_imm = 0;
   1471   if (!isIntImmediate(N->getOperand(1), Srl_imm))
   1472     return false;
   1473 
   1474   assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
   1475          "bad amount in shift node!");
   1476   // Note: The width operand is encoded as width-1.
   1477   unsigned Width = VT.getSizeInBits() - Trunc_bits - Srl_imm - 1;
   1478   int sLSB = Srl_imm - Shl_imm;
   1479   if (sLSB < 0)
   1480     return false;
   1481   LSB = sLSB;
   1482   MSB = LSB + Width;
   1483   // SRA requires a signed extraction
   1484   if (VT == MVT::i32)
   1485     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
   1486   else
   1487     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
   1488   return true;
   1489 }
   1490 
   1491 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   1492                                 SDValue &Opd0, unsigned &LSB, unsigned &MSB,
   1493                                 unsigned NumberOfIgnoredLowBits = 0,
   1494                                 bool BiggerPattern = false) {
   1495   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
   1496     return false;
   1497 
   1498   switch (N->getOpcode()) {
   1499   default:
   1500     if (!N->isMachineOpcode())
   1501       return false;
   1502     break;
   1503   case ISD::AND:
   1504     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, LSB, MSB,
   1505                                       NumberOfIgnoredLowBits, BiggerPattern);
   1506   case ISD::SRL:
   1507   case ISD::SRA:
   1508     return isBitfieldExtractOpFromShr(N, Opc, Opd0, LSB, MSB, BiggerPattern);
   1509   }
   1510 
   1511   unsigned NOpc = N->getMachineOpcode();
   1512   switch (NOpc) {
   1513   default:
   1514     return false;
   1515   case AArch64::SBFMWri:
   1516   case AArch64::UBFMWri:
   1517   case AArch64::SBFMXri:
   1518   case AArch64::UBFMXri:
   1519     Opc = NOpc;
   1520     Opd0 = N->getOperand(0);
   1521     LSB = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
   1522     MSB = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
   1523     return true;
   1524   }
   1525   // Unreachable
   1526   return false;
   1527 }
   1528 
   1529 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
   1530   unsigned Opc, LSB, MSB;
   1531   SDValue Opd0;
   1532   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, LSB, MSB))
   1533     return nullptr;
   1534 
   1535   EVT VT = N->getValueType(0);
   1536 
   1537   // If the bit extract operation is 64bit but the original type is 32bit, we
   1538   // need to add one EXTRACT_SUBREG.
   1539   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
   1540     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(LSB, MVT::i64),
   1541                        CurDAG->getTargetConstant(MSB, MVT::i64)};
   1542 
   1543     SDNode *BFM = CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i64, Ops64);
   1544     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, MVT::i32);
   1545     MachineSDNode *Node =
   1546         CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, SDLoc(N), MVT::i32,
   1547                                SDValue(BFM, 0), SubReg);
   1548     return Node;
   1549   }
   1550 
   1551   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(LSB, VT),
   1552                    CurDAG->getTargetConstant(MSB, VT)};
   1553   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   1554 }
   1555 
   1556 /// Does DstMask form a complementary pair with the mask provided by
   1557 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
   1558 /// this asks whether DstMask zeroes precisely those bits that will be set by
   1559 /// the other half.
   1560 static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
   1561                               unsigned NumberOfIgnoredHighBits, EVT VT) {
   1562   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1563          "i32 or i64 mask type expected!");
   1564   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
   1565 
   1566   APInt SignificantDstMask = APInt(BitWidth, DstMask);
   1567   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
   1568 
   1569   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
   1570          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
   1571 }
   1572 
   1573 // Look for bits that will be useful for later uses.
   1574 // A bit is consider useless as soon as it is dropped and never used
   1575 // before it as been dropped.
   1576 // E.g., looking for useful bit of x
   1577 // 1. y = x & 0x7
   1578 // 2. z = y >> 2
   1579 // After #1, x useful bits are 0x7, then the useful bits of x, live through
   1580 // y.
   1581 // After #2, the useful bits of x are 0x4.
   1582 // However, if x is used on an unpredicatable instruction, then all its bits
   1583 // are useful.
   1584 // E.g.
   1585 // 1. y = x & 0x7
   1586 // 2. z = y >> 2
   1587 // 3. str x, [@x]
   1588 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
   1589 
   1590 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
   1591                                               unsigned Depth) {
   1592   uint64_t Imm =
   1593       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   1594   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
   1595   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
   1596   getUsefulBits(Op, UsefulBits, Depth + 1);
   1597 }
   1598 
   1599 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
   1600                                              uint64_t Imm, uint64_t MSB,
   1601                                              unsigned Depth) {
   1602   // inherit the bitwidth value
   1603   APInt OpUsefulBits(UsefulBits);
   1604   OpUsefulBits = 1;
   1605 
   1606   if (MSB >= Imm) {
   1607     OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
   1608     --OpUsefulBits;
   1609     // The interesting part will be in the lower part of the result
   1610     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   1611     // The interesting part was starting at Imm in the argument
   1612     OpUsefulBits = OpUsefulBits.shl(Imm);
   1613   } else {
   1614     OpUsefulBits = OpUsefulBits.shl(MSB + 1);
   1615     --OpUsefulBits;
   1616     // The interesting part will be shifted in the result
   1617     OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
   1618     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   1619     // The interesting part was at zero in the argument
   1620     OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
   1621   }
   1622 
   1623   UsefulBits &= OpUsefulBits;
   1624 }
   1625 
   1626 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
   1627                                   unsigned Depth) {
   1628   uint64_t Imm =
   1629       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   1630   uint64_t MSB =
   1631       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1632 
   1633   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
   1634 }
   1635 
   1636 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
   1637                                               unsigned Depth) {
   1638   uint64_t ShiftTypeAndValue =
   1639       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1640   APInt Mask(UsefulBits);
   1641   Mask.clearAllBits();
   1642   Mask.flipAllBits();
   1643 
   1644   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
   1645     // Shift Left
   1646     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   1647     Mask = Mask.shl(ShiftAmt);
   1648     getUsefulBits(Op, Mask, Depth + 1);
   1649     Mask = Mask.lshr(ShiftAmt);
   1650   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
   1651     // Shift Right
   1652     // We do not handle AArch64_AM::ASR, because the sign will change the
   1653     // number of useful bits
   1654     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   1655     Mask = Mask.lshr(ShiftAmt);
   1656     getUsefulBits(Op, Mask, Depth + 1);
   1657     Mask = Mask.shl(ShiftAmt);
   1658   } else
   1659     return;
   1660 
   1661   UsefulBits &= Mask;
   1662 }
   1663 
   1664 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
   1665                                  unsigned Depth) {
   1666   uint64_t Imm =
   1667       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1668   uint64_t MSB =
   1669       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
   1670 
   1671   if (Op.getOperand(1) == Orig)
   1672     return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
   1673 
   1674   APInt OpUsefulBits(UsefulBits);
   1675   OpUsefulBits = 1;
   1676 
   1677   if (MSB >= Imm) {
   1678     OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
   1679     --OpUsefulBits;
   1680     UsefulBits &= ~OpUsefulBits;
   1681     getUsefulBits(Op, UsefulBits, Depth + 1);
   1682   } else {
   1683     OpUsefulBits = OpUsefulBits.shl(MSB + 1);
   1684     --OpUsefulBits;
   1685     UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
   1686     getUsefulBits(Op, UsefulBits, Depth + 1);
   1687   }
   1688 }
   1689 
   1690 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
   1691                                 SDValue Orig, unsigned Depth) {
   1692 
   1693   // Users of this node should have already been instruction selected
   1694   // FIXME: Can we turn that into an assert?
   1695   if (!UserNode->isMachineOpcode())
   1696     return;
   1697 
   1698   switch (UserNode->getMachineOpcode()) {
   1699   default:
   1700     return;
   1701   case AArch64::ANDSWri:
   1702   case AArch64::ANDSXri:
   1703   case AArch64::ANDWri:
   1704   case AArch64::ANDXri:
   1705     // We increment Depth only when we call the getUsefulBits
   1706     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
   1707                                              Depth);
   1708   case AArch64::UBFMWri:
   1709   case AArch64::UBFMXri:
   1710     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
   1711 
   1712   case AArch64::ORRWrs:
   1713   case AArch64::ORRXrs:
   1714     if (UserNode->getOperand(1) != Orig)
   1715       return;
   1716     return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
   1717                                              Depth);
   1718   case AArch64::BFMWri:
   1719   case AArch64::BFMXri:
   1720     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
   1721   }
   1722 }
   1723 
   1724 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
   1725   if (Depth >= 6)
   1726     return;
   1727   // Initialize UsefulBits
   1728   if (!Depth) {
   1729     unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
   1730     // At the beginning, assume every produced bits is useful
   1731     UsefulBits = APInt(Bitwidth, 0);
   1732     UsefulBits.flipAllBits();
   1733   }
   1734   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
   1735 
   1736   for (SDNode *Node : Op.getNode()->uses()) {
   1737     // A use cannot produce useful bits
   1738     APInt UsefulBitsForUse = APInt(UsefulBits);
   1739     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
   1740     UsersUsefulBits |= UsefulBitsForUse;
   1741   }
   1742   // UsefulBits contains the produced bits that are meaningful for the
   1743   // current definition, thus a user cannot make a bit meaningful at
   1744   // this point
   1745   UsefulBits &= UsersUsefulBits;
   1746 }
   1747 
   1748 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
   1749 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
   1750 /// 0, return Op unchanged.
   1751 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
   1752   if (ShlAmount == 0)
   1753     return Op;
   1754 
   1755   EVT VT = Op.getValueType();
   1756   unsigned BitWidth = VT.getSizeInBits();
   1757   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   1758 
   1759   SDNode *ShiftNode;
   1760   if (ShlAmount > 0) {
   1761     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
   1762     ShiftNode = CurDAG->getMachineNode(
   1763         UBFMOpc, SDLoc(Op), VT, Op,
   1764         CurDAG->getTargetConstant(BitWidth - ShlAmount, VT),
   1765         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, VT));
   1766   } else {
   1767     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
   1768     assert(ShlAmount < 0 && "expected right shift");
   1769     int ShrAmount = -ShlAmount;
   1770     ShiftNode = CurDAG->getMachineNode(
   1771         UBFMOpc, SDLoc(Op), VT, Op, CurDAG->getTargetConstant(ShrAmount, VT),
   1772         CurDAG->getTargetConstant(BitWidth - 1, VT));
   1773   }
   1774 
   1775   return SDValue(ShiftNode, 0);
   1776 }
   1777 
   1778 /// Does this tree qualify as an attempt to move a bitfield into position,
   1779 /// essentially "(and (shl VAL, N), Mask)".
   1780 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
   1781                                     SDValue &Src, int &ShiftAmount,
   1782                                     int &MaskWidth) {
   1783   EVT VT = Op.getValueType();
   1784   unsigned BitWidth = VT.getSizeInBits();
   1785   (void)BitWidth;
   1786   assert(BitWidth == 32 || BitWidth == 64);
   1787 
   1788   APInt KnownZero, KnownOne;
   1789   CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
   1790 
   1791   // Non-zero in the sense that they're not provably zero, which is the key
   1792   // point if we want to use this value
   1793   uint64_t NonZeroBits = (~KnownZero).getZExtValue();
   1794 
   1795   // Discard a constant AND mask if present. It's safe because the node will
   1796   // already have been factored into the computeKnownBits calculation above.
   1797   uint64_t AndImm;
   1798   if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
   1799     assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
   1800     Op = Op.getOperand(0);
   1801   }
   1802 
   1803   uint64_t ShlImm;
   1804   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
   1805     return false;
   1806   Op = Op.getOperand(0);
   1807 
   1808   if (!isShiftedMask_64(NonZeroBits))
   1809     return false;
   1810 
   1811   ShiftAmount = countTrailingZeros(NonZeroBits);
   1812   MaskWidth = CountTrailingOnes_64(NonZeroBits >> ShiftAmount);
   1813 
   1814   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   1815   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
   1816   // amount.
   1817   Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
   1818 
   1819   return true;
   1820 }
   1821 
   1822 // Given a OR operation, check if we have the following pattern
   1823 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
   1824 //                       isBitfieldExtractOp)
   1825 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
   1826 //                 countTrailingZeros(mask2) == imm2 - imm + 1
   1827 // f = d | c
   1828 // if yes, given reference arguments will be update so that one can replace
   1829 // the OR instruction with:
   1830 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
   1831 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
   1832                                      SDValue &Src, unsigned &ImmR,
   1833                                      unsigned &ImmS, SelectionDAG *CurDAG) {
   1834   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
   1835 
   1836   // Set Opc
   1837   EVT VT = N->getValueType(0);
   1838   if (VT == MVT::i32)
   1839     Opc = AArch64::BFMWri;
   1840   else if (VT == MVT::i64)
   1841     Opc = AArch64::BFMXri;
   1842   else
   1843     return false;
   1844 
   1845   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
   1846   // have the expected shape. Try to undo that.
   1847   APInt UsefulBits;
   1848   getUsefulBits(SDValue(N, 0), UsefulBits);
   1849 
   1850   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
   1851   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
   1852 
   1853   // OR is commutative, check both possibilities (does llvm provide a
   1854   // way to do that directely, e.g., via code matcher?)
   1855   SDValue OrOpd1Val = N->getOperand(1);
   1856   SDNode *OrOpd0 = N->getOperand(0).getNode();
   1857   SDNode *OrOpd1 = N->getOperand(1).getNode();
   1858   for (int i = 0; i < 2;
   1859        ++i, std::swap(OrOpd0, OrOpd1), OrOpd1Val = N->getOperand(0)) {
   1860     unsigned BFXOpc;
   1861     int DstLSB, Width;
   1862     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
   1863                             NumberOfIgnoredLowBits, true)) {
   1864       // Check that the returned opcode is compatible with the pattern,
   1865       // i.e., same type and zero extended (U and not S)
   1866       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
   1867           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
   1868         continue;
   1869 
   1870       // Compute the width of the bitfield insertion
   1871       DstLSB = 0;
   1872       Width = ImmS - ImmR + 1;
   1873       // FIXME: This constraint is to catch bitfield insertion we may
   1874       // want to widen the pattern if we want to grab general bitfied
   1875       // move case
   1876       if (Width <= 0)
   1877         continue;
   1878 
   1879       // If the mask on the insertee is correct, we have a BFXIL operation. We
   1880       // can share the ImmR and ImmS values from the already-computed UBFM.
   1881     } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0), Src,
   1882                                        DstLSB, Width)) {
   1883       ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
   1884       ImmS = Width - 1;
   1885     } else
   1886       continue;
   1887 
   1888     // Check the second part of the pattern
   1889     EVT VT = OrOpd1->getValueType(0);
   1890     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
   1891 
   1892     // Compute the Known Zero for the candidate of the first operand.
   1893     // This allows to catch more general case than just looking for
   1894     // AND with imm. Indeed, simplify-demanded-bits may have removed
   1895     // the AND instruction because it proves it was useless.
   1896     APInt KnownZero, KnownOne;
   1897     CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
   1898 
   1899     // Check if there is enough room for the second operand to appear
   1900     // in the first one
   1901     APInt BitsToBeInserted =
   1902         APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
   1903 
   1904     if ((BitsToBeInserted & ~KnownZero) != 0)
   1905       continue;
   1906 
   1907     // Set the first operand
   1908     uint64_t Imm;
   1909     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
   1910         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
   1911       // In that case, we can eliminate the AND
   1912       Dst = OrOpd1->getOperand(0);
   1913     else
   1914       // Maybe the AND has been removed by simplify-demanded-bits
   1915       // or is useful because it discards more bits
   1916       Dst = OrOpd1Val;
   1917 
   1918     // both parts match
   1919     return true;
   1920   }
   1921 
   1922   return false;
   1923 }
   1924 
   1925 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   1926   if (N->getOpcode() != ISD::OR)
   1927     return nullptr;
   1928 
   1929   unsigned Opc;
   1930   unsigned LSB, MSB;
   1931   SDValue Opd0, Opd1;
   1932 
   1933   if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, CurDAG))
   1934     return nullptr;
   1935 
   1936   EVT VT = N->getValueType(0);
   1937   SDValue Ops[] = { Opd0,
   1938                     Opd1,
   1939                     CurDAG->getTargetConstant(LSB, VT),
   1940                     CurDAG->getTargetConstant(MSB, VT) };
   1941   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   1942 }
   1943 
   1944 SDNode *AArch64DAGToDAGISel::SelectLIBM(SDNode *N) {
   1945   EVT VT = N->getValueType(0);
   1946   unsigned Variant;
   1947   unsigned Opc;
   1948   unsigned FRINTXOpcs[] = { AArch64::FRINTXSr, AArch64::FRINTXDr };
   1949 
   1950   if (VT == MVT::f32) {
   1951     Variant = 0;
   1952   } else if (VT == MVT::f64) {
   1953     Variant = 1;
   1954   } else
   1955     return nullptr; // Unrecognized argument type. Fall back on default codegen.
   1956 
   1957   // Pick the FRINTX variant needed to set the flags.
   1958   unsigned FRINTXOpc = FRINTXOpcs[Variant];
   1959 
   1960   switch (N->getOpcode()) {
   1961   default:
   1962     return nullptr; // Unrecognized libm ISD node. Fall back on default codegen.
   1963   case ISD::FCEIL: {
   1964     unsigned FRINTPOpcs[] = { AArch64::FRINTPSr, AArch64::FRINTPDr };
   1965     Opc = FRINTPOpcs[Variant];
   1966     break;
   1967   }
   1968   case ISD::FFLOOR: {
   1969     unsigned FRINTMOpcs[] = { AArch64::FRINTMSr, AArch64::FRINTMDr };
   1970     Opc = FRINTMOpcs[Variant];
   1971     break;
   1972   }
   1973   case ISD::FTRUNC: {
   1974     unsigned FRINTZOpcs[] = { AArch64::FRINTZSr, AArch64::FRINTZDr };
   1975     Opc = FRINTZOpcs[Variant];
   1976     break;
   1977   }
   1978   case ISD::FROUND: {
   1979     unsigned FRINTAOpcs[] = { AArch64::FRINTASr, AArch64::FRINTADr };
   1980     Opc = FRINTAOpcs[Variant];
   1981     break;
   1982   }
   1983   }
   1984 
   1985   SDLoc dl(N);
   1986   SDValue In = N->getOperand(0);
   1987   SmallVector<SDValue, 2> Ops;
   1988   Ops.push_back(In);
   1989 
   1990   if (!TM.Options.UnsafeFPMath) {
   1991     SDNode *FRINTX = CurDAG->getMachineNode(FRINTXOpc, dl, VT, MVT::Glue, In);
   1992     Ops.push_back(SDValue(FRINTX, 1));
   1993   }
   1994 
   1995   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
   1996 }
   1997 
   1998 bool
   1999 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   2000                                               unsigned RegWidth) {
   2001   APFloat FVal(0.0);
   2002   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
   2003     FVal = CN->getValueAPF();
   2004   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
   2005     // Some otherwise illegal constants are allowed in this case.
   2006     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
   2007         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
   2008       return false;
   2009 
   2010     ConstantPoolSDNode *CN =
   2011         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
   2012     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
   2013   } else
   2014     return false;
   2015 
   2016   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
   2017   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
   2018   // x-register.
   2019   //
   2020   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
   2021   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
   2022   // integers.
   2023   bool IsExact;
   2024 
   2025   // fbits is between 1 and 64 in the worst-case, which means the fmul
   2026   // could have 2^64 as an actual operand. Need 65 bits of precision.
   2027   APSInt IntVal(65, true);
   2028   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
   2029 
   2030   // N.b. isPowerOf2 also checks for > 0.
   2031   if (!IsExact || !IntVal.isPowerOf2()) return false;
   2032   unsigned FBits = IntVal.logBase2();
   2033 
   2034   // Checks above should have guaranteed that we haven't lost information in
   2035   // finding FBits, but it must still be in range.
   2036   if (FBits == 0 || FBits > RegWidth) return false;
   2037 
   2038   FixedPos = CurDAG->getTargetConstant(FBits, MVT::i32);
   2039   return true;
   2040 }
   2041 
   2042 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   2043   // Dump information about the Node being selected
   2044   DEBUG(errs() << "Selecting: ");
   2045   DEBUG(Node->dump(CurDAG));
   2046   DEBUG(errs() << "\n");
   2047 
   2048   // If we have a custom node, we already have selected!
   2049   if (Node->isMachineOpcode()) {
   2050     DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
   2051     Node->setNodeId(-1);
   2052     return nullptr;
   2053   }
   2054 
   2055   // Few custom selection stuff.
   2056   SDNode *ResNode = nullptr;
   2057   EVT VT = Node->getValueType(0);
   2058 
   2059   switch (Node->getOpcode()) {
   2060   default:
   2061     break;
   2062 
   2063   case ISD::ADD:
   2064     if (SDNode *I = SelectMLAV64LaneV128(Node))
   2065       return I;
   2066     break;
   2067 
   2068   case ISD::LOAD: {
   2069     // Try to select as an indexed load. Fall through to normal processing
   2070     // if we can't.
   2071     bool Done = false;
   2072     SDNode *I = SelectIndexedLoad(Node, Done);
   2073     if (Done)
   2074       return I;
   2075     break;
   2076   }
   2077 
   2078   case ISD::SRL:
   2079   case ISD::AND:
   2080   case ISD::SRA:
   2081     if (SDNode *I = SelectBitfieldExtractOp(Node))
   2082       return I;
   2083     break;
   2084 
   2085   case ISD::OR:
   2086     if (SDNode *I = SelectBitfieldInsertOp(Node))
   2087       return I;
   2088     break;
   2089 
   2090   case ISD::EXTRACT_VECTOR_ELT: {
   2091     // Extracting lane zero is a special case where we can just use a plain
   2092     // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
   2093     // the rest of the compiler, especially the register allocator and copyi
   2094     // propagation, to reason about, so is preferred when it's possible to
   2095     // use it.
   2096     ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
   2097     // Bail and use the default Select() for non-zero lanes.
   2098     if (LaneNode->getZExtValue() != 0)
   2099       break;
   2100     // If the element type is not the same as the result type, likewise
   2101     // bail and use the default Select(), as there's more to do than just
   2102     // a cross-class COPY. This catches extracts of i8 and i16 elements
   2103     // since they will need an explicit zext.
   2104     if (VT != Node->getOperand(0).getValueType().getVectorElementType())
   2105       break;
   2106     unsigned SubReg;
   2107     switch (Node->getOperand(0)
   2108                 .getValueType()
   2109                 .getVectorElementType()
   2110                 .getSizeInBits()) {
   2111     default:
   2112       llvm_unreachable("Unexpected vector element type!");
   2113     case 64:
   2114       SubReg = AArch64::dsub;
   2115       break;
   2116     case 32:
   2117       SubReg = AArch64::ssub;
   2118       break;
   2119     case 16: // FALLTHROUGH
   2120     case 8:
   2121       llvm_unreachable("unexpected zext-requiring extract element!");
   2122     }
   2123     SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
   2124                                                      Node->getOperand(0));
   2125     DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
   2126     DEBUG(Extract->dumpr(CurDAG));
   2127     DEBUG(dbgs() << "\n");
   2128     return Extract.getNode();
   2129   }
   2130   case ISD::Constant: {
   2131     // Materialize zero constants as copies from WZR/XZR.  This allows
   2132     // the coalescer to propagate these into other instructions.
   2133     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
   2134     if (ConstNode->isNullValue()) {
   2135       if (VT == MVT::i32)
   2136         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
   2137                                       AArch64::WZR, MVT::i32).getNode();
   2138       else if (VT == MVT::i64)
   2139         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
   2140                                       AArch64::XZR, MVT::i64).getNode();
   2141     }
   2142     break;
   2143   }
   2144 
   2145   case ISD::FrameIndex: {
   2146     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
   2147     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
   2148     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
   2149     const TargetLowering *TLI = getTargetLowering();
   2150     SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy());
   2151     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32),
   2152                       CurDAG->getTargetConstant(Shifter, MVT::i32) };
   2153     return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
   2154   }
   2155   case ISD::INTRINSIC_W_CHAIN: {
   2156     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   2157     switch (IntNo) {
   2158     default:
   2159       break;
   2160     case Intrinsic::aarch64_ldaxp:
   2161     case Intrinsic::aarch64_ldxp: {
   2162       unsigned Op =
   2163           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
   2164       SDValue MemAddr = Node->getOperand(2);
   2165       SDLoc DL(Node);
   2166       SDValue Chain = Node->getOperand(0);
   2167 
   2168       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
   2169                                           MVT::Other, MemAddr, Chain);
   2170 
   2171       // Transfer memoperands.
   2172       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2173       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   2174       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
   2175       return Ld;
   2176     }
   2177     case Intrinsic::aarch64_stlxp:
   2178     case Intrinsic::aarch64_stxp: {
   2179       unsigned Op =
   2180           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
   2181       SDLoc DL(Node);
   2182       SDValue Chain = Node->getOperand(0);
   2183       SDValue ValLo = Node->getOperand(2);
   2184       SDValue ValHi = Node->getOperand(3);
   2185       SDValue MemAddr = Node->getOperand(4);
   2186 
   2187       // Place arguments in the right order.
   2188       SmallVector<SDValue, 7> Ops;
   2189       Ops.push_back(ValLo);
   2190       Ops.push_back(ValHi);
   2191       Ops.push_back(MemAddr);
   2192       Ops.push_back(Chain);
   2193 
   2194       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
   2195       // Transfer memoperands.
   2196       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2197       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   2198       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   2199 
   2200       return St;
   2201     }
   2202     case Intrinsic::aarch64_neon_ld1x2:
   2203       if (VT == MVT::v8i8)
   2204         return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
   2205       else if (VT == MVT::v16i8)
   2206         return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
   2207       else if (VT == MVT::v4i16)
   2208         return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
   2209       else if (VT == MVT::v8i16)
   2210         return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
   2211       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2212         return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
   2213       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2214         return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
   2215       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2216         return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   2217       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2218         return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
   2219       break;
   2220     case Intrinsic::aarch64_neon_ld1x3:
   2221       if (VT == MVT::v8i8)
   2222         return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
   2223       else if (VT == MVT::v16i8)
   2224         return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
   2225       else if (VT == MVT::v4i16)
   2226         return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
   2227       else if (VT == MVT::v8i16)
   2228         return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
   2229       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2230         return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
   2231       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2232         return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
   2233       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2234         return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   2235       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2236         return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
   2237       break;
   2238     case Intrinsic::aarch64_neon_ld1x4:
   2239       if (VT == MVT::v8i8)
   2240         return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
   2241       else if (VT == MVT::v16i8)
   2242         return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
   2243       else if (VT == MVT::v4i16)
   2244         return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
   2245       else if (VT == MVT::v8i16)
   2246         return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
   2247       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2248         return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
   2249       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2250         return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
   2251       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2252         return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   2253       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2254         return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
   2255       break;
   2256     case Intrinsic::aarch64_neon_ld2:
   2257       if (VT == MVT::v8i8)
   2258         return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
   2259       else if (VT == MVT::v16i8)
   2260         return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
   2261       else if (VT == MVT::v4i16)
   2262         return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
   2263       else if (VT == MVT::v8i16)
   2264         return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
   2265       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2266         return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
   2267       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2268         return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
   2269       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2270         return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   2271       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2272         return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
   2273       break;
   2274     case Intrinsic::aarch64_neon_ld3:
   2275       if (VT == MVT::v8i8)
   2276         return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
   2277       else if (VT == MVT::v16i8)
   2278         return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
   2279       else if (VT == MVT::v4i16)
   2280         return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
   2281       else if (VT == MVT::v8i16)
   2282         return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
   2283       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2284         return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
   2285       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2286         return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
   2287       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2288         return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   2289       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2290         return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
   2291       break;
   2292     case Intrinsic::aarch64_neon_ld4:
   2293       if (VT == MVT::v8i8)
   2294         return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
   2295       else if (VT == MVT::v16i8)
   2296         return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
   2297       else if (VT == MVT::v4i16)
   2298         return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
   2299       else if (VT == MVT::v8i16)
   2300         return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
   2301       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2302         return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
   2303       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2304         return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
   2305       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2306         return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   2307       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2308         return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
   2309       break;
   2310     case Intrinsic::aarch64_neon_ld2r:
   2311       if (VT == MVT::v8i8)
   2312         return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
   2313       else if (VT == MVT::v16i8)
   2314         return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
   2315       else if (VT == MVT::v4i16)
   2316         return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
   2317       else if (VT == MVT::v8i16)
   2318         return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
   2319       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2320         return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
   2321       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2322         return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
   2323       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2324         return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
   2325       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2326         return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
   2327       break;
   2328     case Intrinsic::aarch64_neon_ld3r:
   2329       if (VT == MVT::v8i8)
   2330         return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
   2331       else if (VT == MVT::v16i8)
   2332         return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
   2333       else if (VT == MVT::v4i16)
   2334         return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
   2335       else if (VT == MVT::v8i16)
   2336         return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
   2337       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2338         return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
   2339       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2340         return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
   2341       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2342         return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
   2343       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2344         return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
   2345       break;
   2346     case Intrinsic::aarch64_neon_ld4r:
   2347       if (VT == MVT::v8i8)
   2348         return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
   2349       else if (VT == MVT::v16i8)
   2350         return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
   2351       else if (VT == MVT::v4i16)
   2352         return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
   2353       else if (VT == MVT::v8i16)
   2354         return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
   2355       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2356         return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
   2357       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2358         return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
   2359       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2360         return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
   2361       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2362         return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
   2363       break;
   2364     case Intrinsic::aarch64_neon_ld2lane:
   2365       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2366         return SelectLoadLane(Node, 2, AArch64::LD2i8);
   2367       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2368         return SelectLoadLane(Node, 2, AArch64::LD2i16);
   2369       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2370                VT == MVT::v2f32)
   2371         return SelectLoadLane(Node, 2, AArch64::LD2i32);
   2372       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2373                VT == MVT::v1f64)
   2374         return SelectLoadLane(Node, 2, AArch64::LD2i64);
   2375       break;
   2376     case Intrinsic::aarch64_neon_ld3lane:
   2377       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2378         return SelectLoadLane(Node, 3, AArch64::LD3i8);
   2379       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2380         return SelectLoadLane(Node, 3, AArch64::LD3i16);
   2381       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2382                VT == MVT::v2f32)
   2383         return SelectLoadLane(Node, 3, AArch64::LD3i32);
   2384       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2385                VT == MVT::v1f64)
   2386         return SelectLoadLane(Node, 3, AArch64::LD3i64);
   2387       break;
   2388     case Intrinsic::aarch64_neon_ld4lane:
   2389       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2390         return SelectLoadLane(Node, 4, AArch64::LD4i8);
   2391       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2392         return SelectLoadLane(Node, 4, AArch64::LD4i16);
   2393       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2394                VT == MVT::v2f32)
   2395         return SelectLoadLane(Node, 4, AArch64::LD4i32);
   2396       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2397                VT == MVT::v1f64)
   2398         return SelectLoadLane(Node, 4, AArch64::LD4i64);
   2399       break;
   2400     }
   2401   } break;
   2402   case ISD::INTRINSIC_WO_CHAIN: {
   2403     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
   2404     switch (IntNo) {
   2405     default:
   2406       break;
   2407     case Intrinsic::aarch64_neon_tbl2:
   2408       return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
   2409                                                   : AArch64::TBLv16i8Two,
   2410                          false);
   2411     case Intrinsic::aarch64_neon_tbl3:
   2412       return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
   2413                                                   : AArch64::TBLv16i8Three,
   2414                          false);
   2415     case Intrinsic::aarch64_neon_tbl4:
   2416       return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
   2417                                                   : AArch64::TBLv16i8Four,
   2418                          false);
   2419     case Intrinsic::aarch64_neon_tbx2:
   2420       return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
   2421                                                   : AArch64::TBXv16i8Two,
   2422                          true);
   2423     case Intrinsic::aarch64_neon_tbx3:
   2424       return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
   2425                                                   : AArch64::TBXv16i8Three,
   2426                          true);
   2427     case Intrinsic::aarch64_neon_tbx4:
   2428       return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
   2429                                                   : AArch64::TBXv16i8Four,
   2430                          true);
   2431     case Intrinsic::aarch64_neon_smull:
   2432     case Intrinsic::aarch64_neon_umull:
   2433       if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
   2434         return N;
   2435       break;
   2436     }
   2437     break;
   2438   }
   2439   case ISD::INTRINSIC_VOID: {
   2440     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   2441     if (Node->getNumOperands() >= 3)
   2442       VT = Node->getOperand(2)->getValueType(0);
   2443     switch (IntNo) {
   2444     default:
   2445       break;
   2446     case Intrinsic::aarch64_neon_st1x2: {
   2447       if (VT == MVT::v8i8)
   2448         return SelectStore(Node, 2, AArch64::ST1Twov8b);
   2449       else if (VT == MVT::v16i8)
   2450         return SelectStore(Node, 2, AArch64::ST1Twov16b);
   2451       else if (VT == MVT::v4i16)
   2452         return SelectStore(Node, 2, AArch64::ST1Twov4h);
   2453       else if (VT == MVT::v8i16)
   2454         return SelectStore(Node, 2, AArch64::ST1Twov8h);
   2455       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2456         return SelectStore(Node, 2, AArch64::ST1Twov2s);
   2457       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2458         return SelectStore(Node, 2, AArch64::ST1Twov4s);
   2459       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2460         return SelectStore(Node, 2, AArch64::ST1Twov2d);
   2461       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2462         return SelectStore(Node, 2, AArch64::ST1Twov1d);
   2463       break;
   2464     }
   2465     case Intrinsic::aarch64_neon_st1x3: {
   2466       if (VT == MVT::v8i8)
   2467         return SelectStore(Node, 3, AArch64::ST1Threev8b);
   2468       else if (VT == MVT::v16i8)
   2469         return SelectStore(Node, 3, AArch64::ST1Threev16b);
   2470       else if (VT == MVT::v4i16)
   2471         return SelectStore(Node, 3, AArch64::ST1Threev4h);
   2472       else if (VT == MVT::v8i16)
   2473         return SelectStore(Node, 3, AArch64::ST1Threev8h);
   2474       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2475         return SelectStore(Node, 3, AArch64::ST1Threev2s);
   2476       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2477         return SelectStore(Node, 3, AArch64::ST1Threev4s);
   2478       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2479         return SelectStore(Node, 3, AArch64::ST1Threev2d);
   2480       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2481         return SelectStore(Node, 3, AArch64::ST1Threev1d);
   2482       break;
   2483     }
   2484     case Intrinsic::aarch64_neon_st1x4: {
   2485       if (VT == MVT::v8i8)
   2486         return SelectStore(Node, 4, AArch64::ST1Fourv8b);
   2487       else if (VT == MVT::v16i8)
   2488         return SelectStore(Node, 4, AArch64::ST1Fourv16b);
   2489       else if (VT == MVT::v4i16)
   2490         return SelectStore(Node, 4, AArch64::ST1Fourv4h);
   2491       else if (VT == MVT::v8i16)
   2492         return SelectStore(Node, 4, AArch64::ST1Fourv8h);
   2493       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2494         return SelectStore(Node, 4, AArch64::ST1Fourv2s);
   2495       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2496         return SelectStore(Node, 4, AArch64::ST1Fourv4s);
   2497       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2498         return SelectStore(Node, 4, AArch64::ST1Fourv2d);
   2499       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2500         return SelectStore(Node, 4, AArch64::ST1Fourv1d);
   2501       break;
   2502     }
   2503     case Intrinsic::aarch64_neon_st2: {
   2504       if (VT == MVT::v8i8)
   2505         return SelectStore(Node, 2, AArch64::ST2Twov8b);
   2506       else if (VT == MVT::v16i8)
   2507         return SelectStore(Node, 2, AArch64::ST2Twov16b);
   2508       else if (VT == MVT::v4i16)
   2509         return SelectStore(Node, 2, AArch64::ST2Twov4h);
   2510       else if (VT == MVT::v8i16)
   2511         return SelectStore(Node, 2, AArch64::ST2Twov8h);
   2512       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2513         return SelectStore(Node, 2, AArch64::ST2Twov2s);
   2514       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2515         return SelectStore(Node, 2, AArch64::ST2Twov4s);
   2516       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2517         return SelectStore(Node, 2, AArch64::ST2Twov2d);
   2518       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2519         return SelectStore(Node, 2, AArch64::ST1Twov1d);
   2520       break;
   2521     }
   2522     case Intrinsic::aarch64_neon_st3: {
   2523       if (VT == MVT::v8i8)
   2524         return SelectStore(Node, 3, AArch64::ST3Threev8b);
   2525       else if (VT == MVT::v16i8)
   2526         return SelectStore(Node, 3, AArch64::ST3Threev16b);
   2527       else if (VT == MVT::v4i16)
   2528         return SelectStore(Node, 3, AArch64::ST3Threev4h);
   2529       else if (VT == MVT::v8i16)
   2530         return SelectStore(Node, 3, AArch64::ST3Threev8h);
   2531       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2532         return SelectStore(Node, 3, AArch64::ST3Threev2s);
   2533       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2534         return SelectStore(Node, 3, AArch64::ST3Threev4s);
   2535       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2536         return SelectStore(Node, 3, AArch64::ST3Threev2d);
   2537       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2538         return SelectStore(Node, 3, AArch64::ST1Threev1d);
   2539       break;
   2540     }
   2541     case Intrinsic::aarch64_neon_st4: {
   2542       if (VT == MVT::v8i8)
   2543         return SelectStore(Node, 4, AArch64::ST4Fourv8b);
   2544       else if (VT == MVT::v16i8)
   2545         return SelectStore(Node, 4, AArch64::ST4Fourv16b);
   2546       else if (VT == MVT::v4i16)
   2547         return SelectStore(Node, 4, AArch64::ST4Fourv4h);
   2548       else if (VT == MVT::v8i16)
   2549         return SelectStore(Node, 4, AArch64::ST4Fourv8h);
   2550       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2551         return SelectStore(Node, 4, AArch64::ST4Fourv2s);
   2552       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2553         return SelectStore(Node, 4, AArch64::ST4Fourv4s);
   2554       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2555         return SelectStore(Node, 4, AArch64::ST4Fourv2d);
   2556       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2557         return SelectStore(Node, 4, AArch64::ST1Fourv1d);
   2558       break;
   2559     }
   2560     case Intrinsic::aarch64_neon_st2lane: {
   2561       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2562         return SelectStoreLane(Node, 2, AArch64::ST2i8);
   2563       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2564         return SelectStoreLane(Node, 2, AArch64::ST2i16);
   2565       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2566                VT == MVT::v2f32)
   2567         return SelectStoreLane(Node, 2, AArch64::ST2i32);
   2568       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2569                VT == MVT::v1f64)
   2570         return SelectStoreLane(Node, 2, AArch64::ST2i64);
   2571       break;
   2572     }
   2573     case Intrinsic::aarch64_neon_st3lane: {
   2574       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2575         return SelectStoreLane(Node, 3, AArch64::ST3i8);
   2576       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2577         return SelectStoreLane(Node, 3, AArch64::ST3i16);
   2578       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2579                VT == MVT::v2f32)
   2580         return SelectStoreLane(Node, 3, AArch64::ST3i32);
   2581       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2582                VT == MVT::v1f64)
   2583         return SelectStoreLane(Node, 3, AArch64::ST3i64);
   2584       break;
   2585     }
   2586     case Intrinsic::aarch64_neon_st4lane: {
   2587       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2588         return SelectStoreLane(Node, 4, AArch64::ST4i8);
   2589       else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2590         return SelectStoreLane(Node, 4, AArch64::ST4i16);
   2591       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2592                VT == MVT::v2f32)
   2593         return SelectStoreLane(Node, 4, AArch64::ST4i32);
   2594       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2595                VT == MVT::v1f64)
   2596         return SelectStoreLane(Node, 4, AArch64::ST4i64);
   2597       break;
   2598     }
   2599     }
   2600   }
   2601   case AArch64ISD::LD2post: {
   2602     if (VT == MVT::v8i8)
   2603       return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
   2604     else if (VT == MVT::v16i8)
   2605       return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
   2606     else if (VT == MVT::v4i16)
   2607       return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
   2608     else if (VT == MVT::v8i16)
   2609       return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
   2610     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2611       return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
   2612     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2613       return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
   2614     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2615       return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   2616     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2617       return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
   2618     break;
   2619   }
   2620   case AArch64ISD::LD3post: {
   2621     if (VT == MVT::v8i8)
   2622       return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
   2623     else if (VT == MVT::v16i8)
   2624       return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
   2625     else if (VT == MVT::v4i16)
   2626       return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
   2627     else if (VT == MVT::v8i16)
   2628       return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
   2629     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2630       return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
   2631     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2632       return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
   2633     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2634       return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   2635     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2636       return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
   2637     break;
   2638   }
   2639   case AArch64ISD::LD4post: {
   2640     if (VT == MVT::v8i8)
   2641       return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
   2642     else if (VT == MVT::v16i8)
   2643       return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
   2644     else if (VT == MVT::v4i16)
   2645       return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
   2646     else if (VT == MVT::v8i16)
   2647       return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
   2648     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2649       return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
   2650     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2651       return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
   2652     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2653       return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   2654     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2655       return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
   2656     break;
   2657   }
   2658   case AArch64ISD::LD1x2post: {
   2659     if (VT == MVT::v8i8)
   2660       return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
   2661     else if (VT == MVT::v16i8)
   2662       return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
   2663     else if (VT == MVT::v4i16)
   2664       return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
   2665     else if (VT == MVT::v8i16)
   2666       return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
   2667     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2668       return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
   2669     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2670       return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
   2671     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2672       return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   2673     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2674       return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
   2675     break;
   2676   }
   2677   case AArch64ISD::LD1x3post: {
   2678     if (VT == MVT::v8i8)
   2679       return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
   2680     else if (VT == MVT::v16i8)
   2681       return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
   2682     else if (VT == MVT::v4i16)
   2683       return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
   2684     else if (VT == MVT::v8i16)
   2685       return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
   2686     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2687       return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
   2688     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2689       return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
   2690     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2691       return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   2692     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2693       return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
   2694     break;
   2695   }
   2696   case AArch64ISD::LD1x4post: {
   2697     if (VT == MVT::v8i8)
   2698       return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
   2699     else if (VT == MVT::v16i8)
   2700       return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
   2701     else if (VT == MVT::v4i16)
   2702       return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
   2703     else if (VT == MVT::v8i16)
   2704       return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
   2705     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2706       return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
   2707     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2708       return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
   2709     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2710       return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   2711     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2712       return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
   2713     break;
   2714   }
   2715   case AArch64ISD::LD1DUPpost: {
   2716     if (VT == MVT::v8i8)
   2717       return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
   2718     else if (VT == MVT::v16i8)
   2719       return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
   2720     else if (VT == MVT::v4i16)
   2721       return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
   2722     else if (VT == MVT::v8i16)
   2723       return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
   2724     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2725       return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
   2726     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2727       return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
   2728     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2729       return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
   2730     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2731       return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
   2732     break;
   2733   }
   2734   case AArch64ISD::LD2DUPpost: {
   2735     if (VT == MVT::v8i8)
   2736       return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
   2737     else if (VT == MVT::v16i8)
   2738       return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
   2739     else if (VT == MVT::v4i16)
   2740       return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
   2741     else if (VT == MVT::v8i16)
   2742       return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
   2743     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2744       return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
   2745     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2746       return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
   2747     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2748       return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
   2749     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2750       return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
   2751     break;
   2752   }
   2753   case AArch64ISD::LD3DUPpost: {
   2754     if (VT == MVT::v8i8)
   2755       return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
   2756     else if (VT == MVT::v16i8)
   2757       return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
   2758     else if (VT == MVT::v4i16)
   2759       return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
   2760     else if (VT == MVT::v8i16)
   2761       return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
   2762     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2763       return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
   2764     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2765       return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
   2766     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2767       return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
   2768     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2769       return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
   2770     break;
   2771   }
   2772   case AArch64ISD::LD4DUPpost: {
   2773     if (VT == MVT::v8i8)
   2774       return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
   2775     else if (VT == MVT::v16i8)
   2776       return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
   2777     else if (VT == MVT::v4i16)
   2778       return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
   2779     else if (VT == MVT::v8i16)
   2780       return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
   2781     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2782       return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
   2783     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2784       return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
   2785     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2786       return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
   2787     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2788       return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
   2789     break;
   2790   }
   2791   case AArch64ISD::LD1LANEpost: {
   2792     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2793       return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
   2794     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2795       return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
   2796     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2797              VT == MVT::v2f32)
   2798       return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
   2799     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2800              VT == MVT::v1f64)
   2801       return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
   2802     break;
   2803   }
   2804   case AArch64ISD::LD2LANEpost: {
   2805     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2806       return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
   2807     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2808       return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
   2809     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2810              VT == MVT::v2f32)
   2811       return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
   2812     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2813              VT == MVT::v1f64)
   2814       return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
   2815     break;
   2816   }
   2817   case AArch64ISD::LD3LANEpost: {
   2818     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2819       return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
   2820     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2821       return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
   2822     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2823              VT == MVT::v2f32)
   2824       return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
   2825     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2826              VT == MVT::v1f64)
   2827       return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
   2828     break;
   2829   }
   2830   case AArch64ISD::LD4LANEpost: {
   2831     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2832       return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
   2833     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2834       return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
   2835     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2836              VT == MVT::v2f32)
   2837       return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
   2838     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2839              VT == MVT::v1f64)
   2840       return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
   2841     break;
   2842   }
   2843   case AArch64ISD::ST2post: {
   2844     VT = Node->getOperand(1).getValueType();
   2845     if (VT == MVT::v8i8)
   2846       return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
   2847     else if (VT == MVT::v16i8)
   2848       return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
   2849     else if (VT == MVT::v4i16)
   2850       return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
   2851     else if (VT == MVT::v8i16)
   2852       return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
   2853     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2854       return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
   2855     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2856       return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
   2857     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2858       return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
   2859     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2860       return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   2861     break;
   2862   }
   2863   case AArch64ISD::ST3post: {
   2864     VT = Node->getOperand(1).getValueType();
   2865     if (VT == MVT::v8i8)
   2866       return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
   2867     else if (VT == MVT::v16i8)
   2868       return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
   2869     else if (VT == MVT::v4i16)
   2870       return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
   2871     else if (VT == MVT::v8i16)
   2872       return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
   2873     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2874       return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
   2875     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2876       return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
   2877     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2878       return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
   2879     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2880       return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   2881     break;
   2882   }
   2883   case AArch64ISD::ST4post: {
   2884     VT = Node->getOperand(1).getValueType();
   2885     if (VT == MVT::v8i8)
   2886       return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
   2887     else if (VT == MVT::v16i8)
   2888       return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
   2889     else if (VT == MVT::v4i16)
   2890       return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
   2891     else if (VT == MVT::v8i16)
   2892       return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
   2893     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2894       return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
   2895     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2896       return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
   2897     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2898       return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
   2899     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2900       return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   2901     break;
   2902   }
   2903   case AArch64ISD::ST1x2post: {
   2904     VT = Node->getOperand(1).getValueType();
   2905     if (VT == MVT::v8i8)
   2906       return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
   2907     else if (VT == MVT::v16i8)
   2908       return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
   2909     else if (VT == MVT::v4i16)
   2910       return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
   2911     else if (VT == MVT::v8i16)
   2912       return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
   2913     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2914       return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
   2915     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2916       return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
   2917     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2918       return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   2919     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2920       return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
   2921     break;
   2922   }
   2923   case AArch64ISD::ST1x3post: {
   2924     VT = Node->getOperand(1).getValueType();
   2925     if (VT == MVT::v8i8)
   2926       return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
   2927     else if (VT == MVT::v16i8)
   2928       return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
   2929     else if (VT == MVT::v4i16)
   2930       return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
   2931     else if (VT == MVT::v8i16)
   2932       return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
   2933     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2934       return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
   2935     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2936       return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
   2937     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2938       return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   2939     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2940       return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
   2941     break;
   2942   }
   2943   case AArch64ISD::ST1x4post: {
   2944     VT = Node->getOperand(1).getValueType();
   2945     if (VT == MVT::v8i8)
   2946       return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
   2947     else if (VT == MVT::v16i8)
   2948       return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
   2949     else if (VT == MVT::v4i16)
   2950       return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
   2951     else if (VT == MVT::v8i16)
   2952       return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
   2953     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2954       return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
   2955     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2956       return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
   2957     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2958       return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   2959     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2960       return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
   2961     break;
   2962   }
   2963   case AArch64ISD::ST2LANEpost: {
   2964     VT = Node->getOperand(1).getValueType();
   2965     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2966       return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
   2967     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2968       return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
   2969     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2970              VT == MVT::v2f32)
   2971       return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
   2972     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2973              VT == MVT::v1f64)
   2974       return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
   2975     break;
   2976   }
   2977   case AArch64ISD::ST3LANEpost: {
   2978     VT = Node->getOperand(1).getValueType();
   2979     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2980       return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
   2981     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2982       return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
   2983     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2984              VT == MVT::v2f32)
   2985       return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
   2986     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2987              VT == MVT::v1f64)
   2988       return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
   2989     break;
   2990   }
   2991   case AArch64ISD::ST4LANEpost: {
   2992     VT = Node->getOperand(1).getValueType();
   2993     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2994       return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
   2995     else if (VT == MVT::v8i16 || VT == MVT::v4i16)
   2996       return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
   2997     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2998              VT == MVT::v2f32)
   2999       return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
   3000     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3001              VT == MVT::v1f64)
   3002       return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
   3003     break;
   3004   }
   3005 
   3006   case ISD::FCEIL:
   3007   case ISD::FFLOOR:
   3008   case ISD::FTRUNC:
   3009   case ISD::FROUND:
   3010     if (SDNode *I = SelectLIBM(Node))
   3011       return I;
   3012     break;
   3013   }
   3014 
   3015   // Select the default instruction
   3016   ResNode = SelectCode(Node);
   3017 
   3018   DEBUG(errs() << "=> ");
   3019   if (ResNode == nullptr || ResNode == Node)
   3020     DEBUG(Node->dump(CurDAG));
   3021   else
   3022     DEBUG(ResNode->dump(CurDAG));
   3023   DEBUG(errs() << "\n");
   3024 
   3025   return ResNode;
   3026 }
   3027 
   3028 /// createAArch64ISelDag - This pass converts a legalized DAG into a
   3029 /// AArch64-specific DAG, ready for instruction scheduling.
   3030 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
   3031                                          CodeGenOpt::Level OptLevel) {
   3032   return new AArch64DAGToDAGISel(TM, OptLevel);
   3033 }
   3034