Home | History | Annotate | Download | only in AArch64
      1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the AArch64 target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64TargetMachine.h"
     15 #include "MCTargetDesc/AArch64AddressingModes.h"
     16 #include "llvm/ADT/APSInt.h"
     17 #include "llvm/CodeGen/SelectionDAGISel.h"
     18 #include "llvm/IR/Function.h" // To access function attributes.
     19 #include "llvm/IR/GlobalValue.h"
     20 #include "llvm/IR/Intrinsics.h"
     21 #include "llvm/Support/Debug.h"
     22 #include "llvm/Support/ErrorHandling.h"
     23 #include "llvm/Support/MathExtras.h"
     24 #include "llvm/Support/raw_ostream.h"
     25 
     26 using namespace llvm;
     27 
     28 #define DEBUG_TYPE "aarch64-isel"
     29 
     30 //===--------------------------------------------------------------------===//
     31 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
     32 /// instructions for SelectionDAG operations.
     33 ///
     34 namespace {
     35 
     36 class AArch64DAGToDAGISel : public SelectionDAGISel {
     37 
     38   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     39   /// make the right decision when generating code for different targets.
     40   const AArch64Subtarget *Subtarget;
     41 
     42   bool ForCodeSize;
     43 
     44 public:
     45   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
     46                                CodeGenOpt::Level OptLevel)
     47       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
     48         ForCodeSize(false) {}
     49 
     50   const char *getPassName() const override {
     51     return "AArch64 Instruction Selection";
     52   }
     53 
     54   bool runOnMachineFunction(MachineFunction &MF) override {
     55     ForCodeSize = MF.getFunction()->optForSize();
     56     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
     57     return SelectionDAGISel::runOnMachineFunction(MF);
     58   }
     59 
     60   SDNode *Select(SDNode *Node) override;
     61 
     62   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     63   /// inline asm expressions.
     64   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
     65                                     unsigned ConstraintID,
     66                                     std::vector<SDValue> &OutOps) override;
     67 
     68   SDNode *SelectMLAV64LaneV128(SDNode *N);
     69   SDNode *SelectMULLV64LaneV128(unsigned IntNo, SDNode *N);
     70   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
     71   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     72   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     73   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     74     return SelectShiftedRegister(N, false, Reg, Shift);
     75   }
     76   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     77     return SelectShiftedRegister(N, true, Reg, Shift);
     78   }
     79   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
     80     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
     81   }
     82   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
     83     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
     84   }
     85   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
     86     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
     87   }
     88   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
     89     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
     90   }
     91   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
     92     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
     93   }
     94   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
     95     return SelectAddrModeIndexed(N, 1, Base, OffImm);
     96   }
     97   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
     98     return SelectAddrModeIndexed(N, 2, Base, OffImm);
     99   }
    100   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
    101     return SelectAddrModeIndexed(N, 4, Base, OffImm);
    102   }
    103   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
    104     return SelectAddrModeIndexed(N, 8, Base, OffImm);
    105   }
    106   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
    107     return SelectAddrModeIndexed(N, 16, Base, OffImm);
    108   }
    109   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
    110     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
    111   }
    112   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
    113     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
    114   }
    115   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
    116     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
    117   }
    118   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
    119     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
    120   }
    121   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
    122     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
    123   }
    124 
    125   template<int Width>
    126   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
    127                          SDValue &SignExtend, SDValue &DoShift) {
    128     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    129   }
    130 
    131   template<int Width>
    132   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
    133                          SDValue &SignExtend, SDValue &DoShift) {
    134     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    135   }
    136 
    137 
    138   /// Form sequences of consecutive 64/128-bit registers for use in NEON
    139   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
    140   /// between 1 and 4 elements. If it contains a single element that is returned
    141   /// unchanged; otherwise a REG_SEQUENCE value is returned.
    142   SDValue createDTuple(ArrayRef<SDValue> Vecs);
    143   SDValue createQTuple(ArrayRef<SDValue> Vecs);
    144 
    145   /// Generic helper for the createDTuple/createQTuple
    146   /// functions. Those should almost always be called instead.
    147   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
    148                       const unsigned SubRegs[]);
    149 
    150   SDNode *SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
    151 
    152   SDNode *SelectIndexedLoad(SDNode *N, bool &Done);
    153 
    154   SDNode *SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    155                      unsigned SubRegIdx);
    156   SDNode *SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    157                          unsigned SubRegIdx);
    158   SDNode *SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    159   SDNode *SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    160 
    161   SDNode *SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    162   SDNode *SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    163   SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    164   SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    165 
    166   SDNode *SelectBitfieldExtractOp(SDNode *N);
    167   SDNode *SelectBitfieldInsertOp(SDNode *N);
    168   SDNode *SelectBitfieldInsertInZeroOp(SDNode *N);
    169 
    170   SDNode *SelectReadRegister(SDNode *N);
    171   SDNode *SelectWriteRegister(SDNode *N);
    172 
    173 // Include the pieces autogenerated from the target description.
    174 #include "AArch64GenDAGISel.inc"
    175 
    176 private:
    177   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
    178                              SDValue &Shift);
    179   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
    180                                SDValue &OffImm);
    181   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
    182                              SDValue &OffImm);
    183   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
    184                               SDValue &OffImm);
    185   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
    186                          SDValue &Offset, SDValue &SignExtend,
    187                          SDValue &DoShift);
    188   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
    189                          SDValue &Offset, SDValue &SignExtend,
    190                          SDValue &DoShift);
    191   bool isWorthFolding(SDValue V) const;
    192   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
    193                          SDValue &Offset, SDValue &SignExtend);
    194 
    195   template<unsigned RegWidth>
    196   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
    197     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
    198   }
    199 
    200   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
    201 };
    202 } // end anonymous namespace
    203 
    204 /// isIntImmediate - This method tests to see if the node is a constant
    205 /// operand. If so Imm will receive the 32-bit value.
    206 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
    207   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
    208     Imm = C->getZExtValue();
    209     return true;
    210   }
    211   return false;
    212 }
    213 
    214 // isIntImmediate - This method tests to see if a constant operand.
    215 // If so Imm will receive the value.
    216 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
    217   return isIntImmediate(N.getNode(), Imm);
    218 }
    219 
    220 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    221 // opcode and that it has a immediate integer right operand.
    222 // If so Imm will receive the 32 bit value.
    223 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
    224                                   uint64_t &Imm) {
    225   return N->getOpcode() == Opc &&
    226          isIntImmediate(N->getOperand(1).getNode(), Imm);
    227 }
    228 
    229 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
    230     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
    231   switch(ConstraintID) {
    232   default:
    233     llvm_unreachable("Unexpected asm memory constraint");
    234   case InlineAsm::Constraint_i:
    235   case InlineAsm::Constraint_m:
    236   case InlineAsm::Constraint_Q:
    237     // Require the address to be in a register.  That is safe for all AArch64
    238     // variants and it is hard to do anything much smarter without knowing
    239     // how the operand is used.
    240     OutOps.push_back(Op);
    241     return false;
    242   }
    243   return true;
    244 }
    245 
    246 /// SelectArithImmed - Select an immediate value that can be represented as
    247 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
    248 /// Val set to the 12-bit value and Shift set to the shifter operand.
    249 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
    250                                            SDValue &Shift) {
    251   // This function is called from the addsub_shifted_imm ComplexPattern,
    252   // which lists [imm] as the list of opcode it's interested in, however
    253   // we still need to check whether the operand is actually an immediate
    254   // here because the ComplexPattern opcode list is only used in
    255   // root-level opcode matching.
    256   if (!isa<ConstantSDNode>(N.getNode()))
    257     return false;
    258 
    259   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    260   unsigned ShiftAmt;
    261 
    262   if (Immed >> 12 == 0) {
    263     ShiftAmt = 0;
    264   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
    265     ShiftAmt = 12;
    266     Immed = Immed >> 12;
    267   } else
    268     return false;
    269 
    270   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
    271   SDLoc dl(N);
    272   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
    273   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
    274   return true;
    275 }
    276 
    277 /// SelectNegArithImmed - As above, but negates the value before trying to
    278 /// select it.
    279 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
    280                                               SDValue &Shift) {
    281   // This function is called from the addsub_shifted_imm ComplexPattern,
    282   // which lists [imm] as the list of opcode it's interested in, however
    283   // we still need to check whether the operand is actually an immediate
    284   // here because the ComplexPattern opcode list is only used in
    285   // root-level opcode matching.
    286   if (!isa<ConstantSDNode>(N.getNode()))
    287     return false;
    288 
    289   // The immediate operand must be a 24-bit zero-extended immediate.
    290   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    291 
    292   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
    293   // have the opposite effect on the C flag, so this pattern mustn't match under
    294   // those circumstances.
    295   if (Immed == 0)
    296     return false;
    297 
    298   if (N.getValueType() == MVT::i32)
    299     Immed = ~((uint32_t)Immed) + 1;
    300   else
    301     Immed = ~Immed + 1ULL;
    302   if (Immed & 0xFFFFFFFFFF000000ULL)
    303     return false;
    304 
    305   Immed &= 0xFFFFFFULL;
    306   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
    307                           Shift);
    308 }
    309 
    310 /// getShiftTypeForNode - Translate a shift node to the corresponding
    311 /// ShiftType value.
    312 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
    313   switch (N.getOpcode()) {
    314   default:
    315     return AArch64_AM::InvalidShiftExtend;
    316   case ISD::SHL:
    317     return AArch64_AM::LSL;
    318   case ISD::SRL:
    319     return AArch64_AM::LSR;
    320   case ISD::SRA:
    321     return AArch64_AM::ASR;
    322   case ISD::ROTR:
    323     return AArch64_AM::ROR;
    324   }
    325 }
    326 
    327 /// \brief Determine whether it is worth to fold V into an extended register.
    328 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
    329   // it hurts if the value is used at least twice, unless we are optimizing
    330   // for code size.
    331   if (ForCodeSize || V.hasOneUse())
    332     return true;
    333   return false;
    334 }
    335 
    336 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
    337 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
    338 /// instructions allow the shifted register to be rotated, but the arithmetic
    339 /// instructions do not.  The AllowROR parameter specifies whether ROR is
    340 /// supported.
    341 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
    342                                                 SDValue &Reg, SDValue &Shift) {
    343   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
    344   if (ShType == AArch64_AM::InvalidShiftExtend)
    345     return false;
    346   if (!AllowROR && ShType == AArch64_AM::ROR)
    347     return false;
    348 
    349   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    350     unsigned BitSize = N.getValueType().getSizeInBits();
    351     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
    352     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
    353 
    354     Reg = N.getOperand(0);
    355     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
    356     return isWorthFolding(N);
    357   }
    358 
    359   return false;
    360 }
    361 
    362 /// getExtendTypeForNode - Translate an extend node to the corresponding
    363 /// ExtendType value.
    364 static AArch64_AM::ShiftExtendType
    365 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
    366   if (N.getOpcode() == ISD::SIGN_EXTEND ||
    367       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    368     EVT SrcVT;
    369     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
    370       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
    371     else
    372       SrcVT = N.getOperand(0).getValueType();
    373 
    374     if (!IsLoadStore && SrcVT == MVT::i8)
    375       return AArch64_AM::SXTB;
    376     else if (!IsLoadStore && SrcVT == MVT::i16)
    377       return AArch64_AM::SXTH;
    378     else if (SrcVT == MVT::i32)
    379       return AArch64_AM::SXTW;
    380     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    381 
    382     return AArch64_AM::InvalidShiftExtend;
    383   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
    384              N.getOpcode() == ISD::ANY_EXTEND) {
    385     EVT SrcVT = N.getOperand(0).getValueType();
    386     if (!IsLoadStore && SrcVT == MVT::i8)
    387       return AArch64_AM::UXTB;
    388     else if (!IsLoadStore && SrcVT == MVT::i16)
    389       return AArch64_AM::UXTH;
    390     else if (SrcVT == MVT::i32)
    391       return AArch64_AM::UXTW;
    392     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    393 
    394     return AArch64_AM::InvalidShiftExtend;
    395   } else if (N.getOpcode() == ISD::AND) {
    396     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    397     if (!CSD)
    398       return AArch64_AM::InvalidShiftExtend;
    399     uint64_t AndMask = CSD->getZExtValue();
    400 
    401     switch (AndMask) {
    402     default:
    403       return AArch64_AM::InvalidShiftExtend;
    404     case 0xFF:
    405       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
    406     case 0xFFFF:
    407       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
    408     case 0xFFFFFFFF:
    409       return AArch64_AM::UXTW;
    410     }
    411   }
    412 
    413   return AArch64_AM::InvalidShiftExtend;
    414 }
    415 
    416 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
    417 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
    418   if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
    419       DL->getOpcode() != AArch64ISD::DUPLANE32)
    420     return false;
    421 
    422   SDValue SV = DL->getOperand(0);
    423   if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
    424     return false;
    425 
    426   SDValue EV = SV.getOperand(1);
    427   if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
    428     return false;
    429 
    430   ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
    431   ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
    432   LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
    433   LaneOp = EV.getOperand(0);
    434 
    435   return true;
    436 }
    437 
    438 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
    439 // high lane extract.
    440 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
    441                              SDValue &LaneOp, int &LaneIdx) {
    442 
    443   if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
    444     std::swap(Op0, Op1);
    445     if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
    446       return false;
    447   }
    448   StdOp = Op1;
    449   return true;
    450 }
    451 
    452 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
    453 /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
    454 /// so that we don't emit unnecessary lane extracts.
    455 SDNode *AArch64DAGToDAGISel::SelectMLAV64LaneV128(SDNode *N) {
    456   SDLoc dl(N);
    457   SDValue Op0 = N->getOperand(0);
    458   SDValue Op1 = N->getOperand(1);
    459   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
    460   SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
    461   int LaneIdx = -1; // Will hold the lane index.
    462 
    463   if (Op1.getOpcode() != ISD::MUL ||
    464       !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    465                         LaneIdx)) {
    466     std::swap(Op0, Op1);
    467     if (Op1.getOpcode() != ISD::MUL ||
    468         !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    469                           LaneIdx))
    470       return nullptr;
    471   }
    472 
    473   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
    474 
    475   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
    476 
    477   unsigned MLAOpc = ~0U;
    478 
    479   switch (N->getSimpleValueType(0).SimpleTy) {
    480   default:
    481     llvm_unreachable("Unrecognized MLA.");
    482   case MVT::v4i16:
    483     MLAOpc = AArch64::MLAv4i16_indexed;
    484     break;
    485   case MVT::v8i16:
    486     MLAOpc = AArch64::MLAv8i16_indexed;
    487     break;
    488   case MVT::v2i32:
    489     MLAOpc = AArch64::MLAv2i32_indexed;
    490     break;
    491   case MVT::v4i32:
    492     MLAOpc = AArch64::MLAv4i32_indexed;
    493     break;
    494   }
    495 
    496   return CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops);
    497 }
    498 
    499 SDNode *AArch64DAGToDAGISel::SelectMULLV64LaneV128(unsigned IntNo, SDNode *N) {
    500   SDLoc dl(N);
    501   SDValue SMULLOp0;
    502   SDValue SMULLOp1;
    503   int LaneIdx;
    504 
    505   if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
    506                         LaneIdx))
    507     return nullptr;
    508 
    509   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
    510 
    511   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
    512 
    513   unsigned SMULLOpc = ~0U;
    514 
    515   if (IntNo == Intrinsic::aarch64_neon_smull) {
    516     switch (N->getSimpleValueType(0).SimpleTy) {
    517     default:
    518       llvm_unreachable("Unrecognized SMULL.");
    519     case MVT::v4i32:
    520       SMULLOpc = AArch64::SMULLv4i16_indexed;
    521       break;
    522     case MVT::v2i64:
    523       SMULLOpc = AArch64::SMULLv2i32_indexed;
    524       break;
    525     }
    526   } else if (IntNo == Intrinsic::aarch64_neon_umull) {
    527     switch (N->getSimpleValueType(0).SimpleTy) {
    528     default:
    529       llvm_unreachable("Unrecognized SMULL.");
    530     case MVT::v4i32:
    531       SMULLOpc = AArch64::UMULLv4i16_indexed;
    532       break;
    533     case MVT::v2i64:
    534       SMULLOpc = AArch64::UMULLv2i32_indexed;
    535       break;
    536     }
    537   } else
    538     llvm_unreachable("Unrecognized intrinsic.");
    539 
    540   return CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops);
    541 }
    542 
    543 /// Instructions that accept extend modifiers like UXTW expect the register
    544 /// being extended to be a GPR32, but the incoming DAG might be acting on a
    545 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
    546 /// this is the case.
    547 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
    548   if (N.getValueType() == MVT::i32)
    549     return N;
    550 
    551   SDLoc dl(N);
    552   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    553   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    554                                                dl, MVT::i32, N, SubReg);
    555   return SDValue(Node, 0);
    556 }
    557 
    558 
    559 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
    560 /// operand folds in an extend followed by an optional left shift.
    561 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
    562                                                       SDValue &Shift) {
    563   unsigned ShiftVal = 0;
    564   AArch64_AM::ShiftExtendType Ext;
    565 
    566   if (N.getOpcode() == ISD::SHL) {
    567     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    568     if (!CSD)
    569       return false;
    570     ShiftVal = CSD->getZExtValue();
    571     if (ShiftVal > 4)
    572       return false;
    573 
    574     Ext = getExtendTypeForNode(N.getOperand(0));
    575     if (Ext == AArch64_AM::InvalidShiftExtend)
    576       return false;
    577 
    578     Reg = N.getOperand(0).getOperand(0);
    579   } else {
    580     Ext = getExtendTypeForNode(N);
    581     if (Ext == AArch64_AM::InvalidShiftExtend)
    582       return false;
    583 
    584     Reg = N.getOperand(0);
    585   }
    586 
    587   // AArch64 mandates that the RHS of the operation must use the smallest
    588   // register class that could contain the size being extended from.  Thus,
    589   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
    590   // there might not be an actual 32-bit value in the program.  We can
    591   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
    592   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
    593   Reg = narrowIfNeeded(CurDAG, Reg);
    594   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
    595                                     MVT::i32);
    596   return isWorthFolding(N);
    597 }
    598 
    599 /// If there's a use of this ADDlow that's not itself a load/store then we'll
    600 /// need to create a real ADD instruction from it anyway and there's no point in
    601 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
    602 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
    603 /// leads to duplicated ADRP instructions.
    604 static bool isWorthFoldingADDlow(SDValue N) {
    605   for (auto Use : N->uses()) {
    606     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
    607         Use->getOpcode() != ISD::ATOMIC_LOAD &&
    608         Use->getOpcode() != ISD::ATOMIC_STORE)
    609       return false;
    610 
    611     // ldar and stlr have much more restrictive addressing modes (just a
    612     // register).
    613     if (cast<MemSDNode>(Use)->getOrdering() > Monotonic)
    614       return false;
    615   }
    616 
    617   return true;
    618 }
    619 
    620 /// SelectAddrModeIndexed7S - Select a "register plus scaled signed 7-bit
    621 /// immediate" address.  The "Size" argument is the size in bytes of the memory
    622 /// reference, which determines the scale.
    623 bool AArch64DAGToDAGISel::SelectAddrModeIndexed7S(SDValue N, unsigned Size,
    624                                                   SDValue &Base,
    625                                                   SDValue &OffImm) {
    626   SDLoc dl(N);
    627   const DataLayout &DL = CurDAG->getDataLayout();
    628   const TargetLowering *TLI = getTargetLowering();
    629   if (N.getOpcode() == ISD::FrameIndex) {
    630     int FI = cast<FrameIndexSDNode>(N)->getIndex();
    631     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    632     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    633     return true;
    634   }
    635 
    636   // As opposed to the (12-bit) Indexed addressing mode below, the 7-bit signed
    637   // selected here doesn't support labels/immediates, only base+offset.
    638 
    639   if (CurDAG->isBaseWithConstantOffset(N)) {
    640     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    641       int64_t RHSC = RHS->getSExtValue();
    642       unsigned Scale = Log2_32(Size);
    643       if ((RHSC & (Size - 1)) == 0 && RHSC >= -(0x40 << Scale) &&
    644           RHSC < (0x40 << Scale)) {
    645         Base = N.getOperand(0);
    646         if (Base.getOpcode() == ISD::FrameIndex) {
    647           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    648           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    649         }
    650         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
    651         return true;
    652       }
    653     }
    654   }
    655 
    656   // Base only. The address will be materialized into a register before
    657   // the memory is accessed.
    658   //    add x0, Xbase, #offset
    659   //    stp x1, x2, [x0]
    660   Base = N;
    661   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    662   return true;
    663 }
    664 
    665 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
    666 /// immediate" address.  The "Size" argument is the size in bytes of the memory
    667 /// reference, which determines the scale.
    668 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
    669                                               SDValue &Base, SDValue &OffImm) {
    670   SDLoc dl(N);
    671   const DataLayout &DL = CurDAG->getDataLayout();
    672   const TargetLowering *TLI = getTargetLowering();
    673   if (N.getOpcode() == ISD::FrameIndex) {
    674     int FI = cast<FrameIndexSDNode>(N)->getIndex();
    675     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    676     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    677     return true;
    678   }
    679 
    680   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
    681     GlobalAddressSDNode *GAN =
    682         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
    683     Base = N.getOperand(0);
    684     OffImm = N.getOperand(1);
    685     if (!GAN)
    686       return true;
    687 
    688     const GlobalValue *GV = GAN->getGlobal();
    689     unsigned Alignment = GV->getAlignment();
    690     Type *Ty = GV->getType()->getElementType();
    691     if (Alignment == 0 && Ty->isSized())
    692       Alignment = DL.getABITypeAlignment(Ty);
    693 
    694     if (Alignment >= Size)
    695       return true;
    696   }
    697 
    698   if (CurDAG->isBaseWithConstantOffset(N)) {
    699     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    700       int64_t RHSC = (int64_t)RHS->getZExtValue();
    701       unsigned Scale = Log2_32(Size);
    702       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
    703         Base = N.getOperand(0);
    704         if (Base.getOpcode() == ISD::FrameIndex) {
    705           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    706           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    707         }
    708         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
    709         return true;
    710       }
    711     }
    712   }
    713 
    714   // Before falling back to our general case, check if the unscaled
    715   // instructions can handle this. If so, that's preferable.
    716   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
    717     return false;
    718 
    719   // Base only. The address will be materialized into a register before
    720   // the memory is accessed.
    721   //    add x0, Xbase, #offset
    722   //    ldr x0, [x0]
    723   Base = N;
    724   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    725   return true;
    726 }
    727 
    728 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
    729 /// immediate" address.  This should only match when there is an offset that
    730 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
    731 /// is the size in bytes of the memory reference, which is needed here to know
    732 /// what is valid for a scaled immediate.
    733 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
    734                                                  SDValue &Base,
    735                                                  SDValue &OffImm) {
    736   if (!CurDAG->isBaseWithConstantOffset(N))
    737     return false;
    738   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    739     int64_t RHSC = RHS->getSExtValue();
    740     // If the offset is valid as a scaled immediate, don't match here.
    741     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
    742         RHSC < (0x1000 << Log2_32(Size)))
    743       return false;
    744     if (RHSC >= -256 && RHSC < 256) {
    745       Base = N.getOperand(0);
    746       if (Base.getOpcode() == ISD::FrameIndex) {
    747         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    748         const TargetLowering *TLI = getTargetLowering();
    749         Base = CurDAG->getTargetFrameIndex(
    750             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    751       }
    752       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
    753       return true;
    754     }
    755   }
    756   return false;
    757 }
    758 
    759 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
    760   SDLoc dl(N);
    761   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    762   SDValue ImpDef = SDValue(
    763       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
    764   MachineSDNode *Node = CurDAG->getMachineNode(
    765       TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
    766   return SDValue(Node, 0);
    767 }
    768 
    769 /// \brief Check if the given SHL node (\p N), can be used to form an
    770 /// extended register for an addressing mode.
    771 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
    772                                             bool WantExtend, SDValue &Offset,
    773                                             SDValue &SignExtend) {
    774   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
    775   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    776   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
    777     return false;
    778 
    779   SDLoc dl(N);
    780   if (WantExtend) {
    781     AArch64_AM::ShiftExtendType Ext =
    782         getExtendTypeForNode(N.getOperand(0), true);
    783     if (Ext == AArch64_AM::InvalidShiftExtend)
    784       return false;
    785 
    786     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
    787     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
    788                                            MVT::i32);
    789   } else {
    790     Offset = N.getOperand(0);
    791     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
    792   }
    793 
    794   unsigned LegalShiftVal = Log2_32(Size);
    795   unsigned ShiftVal = CSD->getZExtValue();
    796 
    797   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
    798     return false;
    799 
    800   if (isWorthFolding(N))
    801     return true;
    802 
    803   return false;
    804 }
    805 
    806 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
    807                                             SDValue &Base, SDValue &Offset,
    808                                             SDValue &SignExtend,
    809                                             SDValue &DoShift) {
    810   if (N.getOpcode() != ISD::ADD)
    811     return false;
    812   SDValue LHS = N.getOperand(0);
    813   SDValue RHS = N.getOperand(1);
    814   SDLoc dl(N);
    815 
    816   // We don't want to match immediate adds here, because they are better lowered
    817   // to the register-immediate addressing modes.
    818   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
    819     return false;
    820 
    821   // Check if this particular node is reused in any non-memory related
    822   // operation.  If yes, do not try to fold this node into the address
    823   // computation, since the computation will be kept.
    824   const SDNode *Node = N.getNode();
    825   for (SDNode *UI : Node->uses()) {
    826     if (!isa<MemSDNode>(*UI))
    827       return false;
    828   }
    829 
    830   // Remember if it is worth folding N when it produces extended register.
    831   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
    832 
    833   // Try to match a shifted extend on the RHS.
    834   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
    835       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
    836     Base = LHS;
    837     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
    838     return true;
    839   }
    840 
    841   // Try to match a shifted extend on the LHS.
    842   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
    843       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
    844     Base = RHS;
    845     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
    846     return true;
    847   }
    848 
    849   // There was no shift, whatever else we find.
    850   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
    851 
    852   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
    853   // Try to match an unshifted extend on the LHS.
    854   if (IsExtendedRegisterWorthFolding &&
    855       (Ext = getExtendTypeForNode(LHS, true)) !=
    856           AArch64_AM::InvalidShiftExtend) {
    857     Base = RHS;
    858     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
    859     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
    860                                            MVT::i32);
    861     if (isWorthFolding(LHS))
    862       return true;
    863   }
    864 
    865   // Try to match an unshifted extend on the RHS.
    866   if (IsExtendedRegisterWorthFolding &&
    867       (Ext = getExtendTypeForNode(RHS, true)) !=
    868           AArch64_AM::InvalidShiftExtend) {
    869     Base = LHS;
    870     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
    871     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
    872                                            MVT::i32);
    873     if (isWorthFolding(RHS))
    874       return true;
    875   }
    876 
    877   return false;
    878 }
    879 
    880 // Check if the given immediate is preferred by ADD. If an immediate can be
    881 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
    882 // encoded by one MOVZ, return true.
    883 static bool isPreferredADD(int64_t ImmOff) {
    884   // Constant in [0x0, 0xfff] can be encoded in ADD.
    885   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
    886     return true;
    887   // Check if it can be encoded in an "ADD LSL #12".
    888   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
    889     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
    890     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
    891            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
    892   return false;
    893 }
    894 
    895 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
    896                                             SDValue &Base, SDValue &Offset,
    897                                             SDValue &SignExtend,
    898                                             SDValue &DoShift) {
    899   if (N.getOpcode() != ISD::ADD)
    900     return false;
    901   SDValue LHS = N.getOperand(0);
    902   SDValue RHS = N.getOperand(1);
    903   SDLoc DL(N);
    904 
    905   // Check if this particular node is reused in any non-memory related
    906   // operation.  If yes, do not try to fold this node into the address
    907   // computation, since the computation will be kept.
    908   const SDNode *Node = N.getNode();
    909   for (SDNode *UI : Node->uses()) {
    910     if (!isa<MemSDNode>(*UI))
    911       return false;
    912   }
    913 
    914   // Watch out if RHS is a wide immediate, it can not be selected into
    915   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
    916   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
    917   // instructions like:
    918   //     MOV  X0, WideImmediate
    919   //     ADD  X1, BaseReg, X0
    920   //     LDR  X2, [X1, 0]
    921   // For such situation, using [BaseReg, XReg] addressing mode can save one
    922   // ADD/SUB:
    923   //     MOV  X0, WideImmediate
    924   //     LDR  X2, [BaseReg, X0]
    925   if (isa<ConstantSDNode>(RHS)) {
    926     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
    927     unsigned Scale = Log2_32(Size);
    928     // Skip the immediate can be selected by load/store addressing mode.
    929     // Also skip the immediate can be encoded by a single ADD (SUB is also
    930     // checked by using -ImmOff).
    931     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
    932         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
    933       return false;
    934 
    935     SDValue Ops[] = { RHS };
    936     SDNode *MOVI =
    937         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
    938     SDValue MOVIV = SDValue(MOVI, 0);
    939     // This ADD of two X register will be selected into [Reg+Reg] mode.
    940     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
    941   }
    942 
    943   // Remember if it is worth folding N when it produces extended register.
    944   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
    945 
    946   // Try to match a shifted extend on the RHS.
    947   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
    948       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
    949     Base = LHS;
    950     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
    951     return true;
    952   }
    953 
    954   // Try to match a shifted extend on the LHS.
    955   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
    956       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
    957     Base = RHS;
    958     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
    959     return true;
    960   }
    961 
    962   // Match any non-shifted, non-extend, non-immediate add expression.
    963   Base = LHS;
    964   Offset = RHS;
    965   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
    966   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
    967   // Reg1 + Reg2 is free: no check needed.
    968   return true;
    969 }
    970 
    971 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
    972   static const unsigned RegClassIDs[] = {
    973       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
    974   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
    975                                      AArch64::dsub2, AArch64::dsub3};
    976 
    977   return createTuple(Regs, RegClassIDs, SubRegs);
    978 }
    979 
    980 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
    981   static const unsigned RegClassIDs[] = {
    982       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
    983   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
    984                                      AArch64::qsub2, AArch64::qsub3};
    985 
    986   return createTuple(Regs, RegClassIDs, SubRegs);
    987 }
    988 
    989 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
    990                                          const unsigned RegClassIDs[],
    991                                          const unsigned SubRegs[]) {
    992   // There's no special register-class for a vector-list of 1 element: it's just
    993   // a vector.
    994   if (Regs.size() == 1)
    995     return Regs[0];
    996 
    997   assert(Regs.size() >= 2 && Regs.size() <= 4);
    998 
    999   SDLoc DL(Regs[0]);
   1000 
   1001   SmallVector<SDValue, 4> Ops;
   1002 
   1003   // First operand of REG_SEQUENCE is the desired RegClass.
   1004   Ops.push_back(
   1005       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
   1006 
   1007   // Then we get pairs of source & subregister-position for the components.
   1008   for (unsigned i = 0; i < Regs.size(); ++i) {
   1009     Ops.push_back(Regs[i]);
   1010     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
   1011   }
   1012 
   1013   SDNode *N =
   1014       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
   1015   return SDValue(N, 0);
   1016 }
   1017 
   1018 SDNode *AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs,
   1019                                          unsigned Opc, bool isExt) {
   1020   SDLoc dl(N);
   1021   EVT VT = N->getValueType(0);
   1022 
   1023   unsigned ExtOff = isExt;
   1024 
   1025   // Form a REG_SEQUENCE to force register allocation.
   1026   unsigned Vec0Off = ExtOff + 1;
   1027   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
   1028                                N->op_begin() + Vec0Off + NumVecs);
   1029   SDValue RegSeq = createQTuple(Regs);
   1030 
   1031   SmallVector<SDValue, 6> Ops;
   1032   if (isExt)
   1033     Ops.push_back(N->getOperand(1));
   1034   Ops.push_back(RegSeq);
   1035   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
   1036   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
   1037 }
   1038 
   1039 SDNode *AArch64DAGToDAGISel::SelectIndexedLoad(SDNode *N, bool &Done) {
   1040   LoadSDNode *LD = cast<LoadSDNode>(N);
   1041   if (LD->isUnindexed())
   1042     return nullptr;
   1043   EVT VT = LD->getMemoryVT();
   1044   EVT DstVT = N->getValueType(0);
   1045   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1046   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
   1047 
   1048   // We're not doing validity checking here. That was done when checking
   1049   // if we should mark the load as indexed or not. We're just selecting
   1050   // the right instruction.
   1051   unsigned Opcode = 0;
   1052 
   1053   ISD::LoadExtType ExtType = LD->getExtensionType();
   1054   bool InsertTo64 = false;
   1055   if (VT == MVT::i64)
   1056     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
   1057   else if (VT == MVT::i32) {
   1058     if (ExtType == ISD::NON_EXTLOAD)
   1059       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
   1060     else if (ExtType == ISD::SEXTLOAD)
   1061       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
   1062     else {
   1063       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
   1064       InsertTo64 = true;
   1065       // The result of the load is only i32. It's the subreg_to_reg that makes
   1066       // it into an i64.
   1067       DstVT = MVT::i32;
   1068     }
   1069   } else if (VT == MVT::i16) {
   1070     if (ExtType == ISD::SEXTLOAD) {
   1071       if (DstVT == MVT::i64)
   1072         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
   1073       else
   1074         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
   1075     } else {
   1076       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
   1077       InsertTo64 = DstVT == MVT::i64;
   1078       // The result of the load is only i32. It's the subreg_to_reg that makes
   1079       // it into an i64.
   1080       DstVT = MVT::i32;
   1081     }
   1082   } else if (VT == MVT::i8) {
   1083     if (ExtType == ISD::SEXTLOAD) {
   1084       if (DstVT == MVT::i64)
   1085         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
   1086       else
   1087         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
   1088     } else {
   1089       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
   1090       InsertTo64 = DstVT == MVT::i64;
   1091       // The result of the load is only i32. It's the subreg_to_reg that makes
   1092       // it into an i64.
   1093       DstVT = MVT::i32;
   1094     }
   1095   } else if (VT == MVT::f16) {
   1096     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
   1097   } else if (VT == MVT::f32) {
   1098     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
   1099   } else if (VT == MVT::f64 || VT.is64BitVector()) {
   1100     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
   1101   } else if (VT.is128BitVector()) {
   1102     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
   1103   } else
   1104     return nullptr;
   1105   SDValue Chain = LD->getChain();
   1106   SDValue Base = LD->getBasePtr();
   1107   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
   1108   int OffsetVal = (int)OffsetOp->getZExtValue();
   1109   SDLoc dl(N);
   1110   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
   1111   SDValue Ops[] = { Base, Offset, Chain };
   1112   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
   1113                                        MVT::Other, Ops);
   1114   // Either way, we're replacing the node, so tell the caller that.
   1115   Done = true;
   1116   SDValue LoadedVal = SDValue(Res, 1);
   1117   if (InsertTo64) {
   1118     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   1119     LoadedVal =
   1120         SDValue(CurDAG->getMachineNode(
   1121                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
   1122                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
   1123                     SubReg),
   1124                 0);
   1125   }
   1126 
   1127   ReplaceUses(SDValue(N, 0), LoadedVal);
   1128   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
   1129   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
   1130 
   1131   return nullptr;
   1132 }
   1133 
   1134 SDNode *AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs,
   1135                                         unsigned Opc, unsigned SubRegIdx) {
   1136   SDLoc dl(N);
   1137   EVT VT = N->getValueType(0);
   1138   SDValue Chain = N->getOperand(0);
   1139 
   1140   SDValue Ops[] = {N->getOperand(2), // Mem operand;
   1141                    Chain};
   1142 
   1143   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
   1144 
   1145   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1146   SDValue SuperReg = SDValue(Ld, 0);
   1147   for (unsigned i = 0; i < NumVecs; ++i)
   1148     ReplaceUses(SDValue(N, i),
   1149         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1150 
   1151   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1152   return nullptr;
   1153 }
   1154 
   1155 SDNode *AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   1156                                             unsigned Opc, unsigned SubRegIdx) {
   1157   SDLoc dl(N);
   1158   EVT VT = N->getValueType(0);
   1159   SDValue Chain = N->getOperand(0);
   1160 
   1161   SDValue Ops[] = {N->getOperand(1), // Mem operand
   1162                    N->getOperand(2), // Incremental
   1163                    Chain};
   1164 
   1165   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1166                         MVT::Untyped, MVT::Other};
   1167 
   1168   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1169 
   1170   // Update uses of write back register
   1171   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1172 
   1173   // Update uses of vector list
   1174   SDValue SuperReg = SDValue(Ld, 1);
   1175   if (NumVecs == 1)
   1176     ReplaceUses(SDValue(N, 0), SuperReg);
   1177   else
   1178     for (unsigned i = 0; i < NumVecs; ++i)
   1179       ReplaceUses(SDValue(N, i),
   1180           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1181 
   1182   // Update the chain
   1183   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1184   return nullptr;
   1185 }
   1186 
   1187 SDNode *AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   1188                                          unsigned Opc) {
   1189   SDLoc dl(N);
   1190   EVT VT = N->getOperand(2)->getValueType(0);
   1191 
   1192   // Form a REG_SEQUENCE to force register allocation.
   1193   bool Is128Bit = VT.getSizeInBits() == 128;
   1194   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1195   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1196 
   1197   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
   1198   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
   1199 
   1200   return St;
   1201 }
   1202 
   1203 SDNode *AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
   1204                                              unsigned Opc) {
   1205   SDLoc dl(N);
   1206   EVT VT = N->getOperand(2)->getValueType(0);
   1207   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
   1208                         MVT::Other}; // Type for the Chain
   1209 
   1210   // Form a REG_SEQUENCE to force register allocation.
   1211   bool Is128Bit = VT.getSizeInBits() == 128;
   1212   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1213   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1214 
   1215   SDValue Ops[] = {RegSeq,
   1216                    N->getOperand(NumVecs + 1), // base register
   1217                    N->getOperand(NumVecs + 2), // Incremental
   1218                    N->getOperand(0)};          // Chain
   1219   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1220 
   1221   return St;
   1222 }
   1223 
   1224 namespace {
   1225 /// WidenVector - Given a value in the V64 register class, produce the
   1226 /// equivalent value in the V128 register class.
   1227 class WidenVector {
   1228   SelectionDAG &DAG;
   1229 
   1230 public:
   1231   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
   1232 
   1233   SDValue operator()(SDValue V64Reg) {
   1234     EVT VT = V64Reg.getValueType();
   1235     unsigned NarrowSize = VT.getVectorNumElements();
   1236     MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1237     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
   1238     SDLoc DL(V64Reg);
   1239 
   1240     SDValue Undef =
   1241         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
   1242     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
   1243   }
   1244 };
   1245 } // namespace
   1246 
   1247 /// NarrowVector - Given a value in the V128 register class, produce the
   1248 /// equivalent value in the V64 register class.
   1249 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
   1250   EVT VT = V128Reg.getValueType();
   1251   unsigned WideSize = VT.getVectorNumElements();
   1252   MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1253   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
   1254 
   1255   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
   1256                                     V128Reg);
   1257 }
   1258 
   1259 SDNode *AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   1260                                             unsigned Opc) {
   1261   SDLoc dl(N);
   1262   EVT VT = N->getValueType(0);
   1263   bool Narrow = VT.getSizeInBits() == 64;
   1264 
   1265   // Form a REG_SEQUENCE to force register allocation.
   1266   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1267 
   1268   if (Narrow)
   1269     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1270                    WidenVector(*CurDAG));
   1271 
   1272   SDValue RegSeq = createQTuple(Regs);
   1273 
   1274   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
   1275 
   1276   unsigned LaneNo =
   1277       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1278 
   1279   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1280                    N->getOperand(NumVecs + 3), N->getOperand(0)};
   1281   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1282   SDValue SuperReg = SDValue(Ld, 0);
   1283 
   1284   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1285   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
   1286                                     AArch64::qsub2, AArch64::qsub3 };
   1287   for (unsigned i = 0; i < NumVecs; ++i) {
   1288     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
   1289     if (Narrow)
   1290       NV = NarrowVector(NV, *CurDAG);
   1291     ReplaceUses(SDValue(N, i), NV);
   1292   }
   1293 
   1294   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1295 
   1296   return Ld;
   1297 }
   1298 
   1299 SDNode *AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
   1300                                                 unsigned Opc) {
   1301   SDLoc dl(N);
   1302   EVT VT = N->getValueType(0);
   1303   bool Narrow = VT.getSizeInBits() == 64;
   1304 
   1305   // Form a REG_SEQUENCE to force register allocation.
   1306   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1307 
   1308   if (Narrow)
   1309     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1310                    WidenVector(*CurDAG));
   1311 
   1312   SDValue RegSeq = createQTuple(Regs);
   1313 
   1314   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1315                         RegSeq->getValueType(0), MVT::Other};
   1316 
   1317   unsigned LaneNo =
   1318       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1319 
   1320   SDValue Ops[] = {RegSeq,
   1321                    CurDAG->getTargetConstant(LaneNo, dl,
   1322                                              MVT::i64),         // Lane Number
   1323                    N->getOperand(NumVecs + 2),                  // Base register
   1324                    N->getOperand(NumVecs + 3),                  // Incremental
   1325                    N->getOperand(0)};
   1326   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1327 
   1328   // Update uses of the write back register
   1329   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1330 
   1331   // Update uses of the vector list
   1332   SDValue SuperReg = SDValue(Ld, 1);
   1333   if (NumVecs == 1) {
   1334     ReplaceUses(SDValue(N, 0),
   1335                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
   1336   } else {
   1337     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1338     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
   1339                                       AArch64::qsub2, AArch64::qsub3 };
   1340     for (unsigned i = 0; i < NumVecs; ++i) {
   1341       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
   1342                                                   SuperReg);
   1343       if (Narrow)
   1344         NV = NarrowVector(NV, *CurDAG);
   1345       ReplaceUses(SDValue(N, i), NV);
   1346     }
   1347   }
   1348 
   1349   // Update the Chain
   1350   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1351 
   1352   return Ld;
   1353 }
   1354 
   1355 SDNode *AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   1356                                              unsigned Opc) {
   1357   SDLoc dl(N);
   1358   EVT VT = N->getOperand(2)->getValueType(0);
   1359   bool Narrow = VT.getSizeInBits() == 64;
   1360 
   1361   // Form a REG_SEQUENCE to force register allocation.
   1362   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1363 
   1364   if (Narrow)
   1365     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1366                    WidenVector(*CurDAG));
   1367 
   1368   SDValue RegSeq = createQTuple(Regs);
   1369 
   1370   unsigned LaneNo =
   1371       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1372 
   1373   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1374                    N->getOperand(NumVecs + 3), N->getOperand(0)};
   1375   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
   1376 
   1377   // Transfer memoperands.
   1378   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1379   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1380   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   1381 
   1382   return St;
   1383 }
   1384 
   1385 SDNode *AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
   1386                                                  unsigned Opc) {
   1387   SDLoc dl(N);
   1388   EVT VT = N->getOperand(2)->getValueType(0);
   1389   bool Narrow = VT.getSizeInBits() == 64;
   1390 
   1391   // Form a REG_SEQUENCE to force register allocation.
   1392   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1393 
   1394   if (Narrow)
   1395     std::transform(Regs.begin(), Regs.end(), Regs.begin(),
   1396                    WidenVector(*CurDAG));
   1397 
   1398   SDValue RegSeq = createQTuple(Regs);
   1399 
   1400   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1401                         MVT::Other};
   1402 
   1403   unsigned LaneNo =
   1404       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1405 
   1406   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1407                    N->getOperand(NumVecs + 2), // Base Register
   1408                    N->getOperand(NumVecs + 3), // Incremental
   1409                    N->getOperand(0)};
   1410   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1411 
   1412   // Transfer memoperands.
   1413   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1414   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1415   cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   1416 
   1417   return St;
   1418 }
   1419 
   1420 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
   1421                                        unsigned &Opc, SDValue &Opd0,
   1422                                        unsigned &LSB, unsigned &MSB,
   1423                                        unsigned NumberOfIgnoredLowBits,
   1424                                        bool BiggerPattern) {
   1425   assert(N->getOpcode() == ISD::AND &&
   1426          "N must be a AND operation to call this function");
   1427 
   1428   EVT VT = N->getValueType(0);
   1429 
   1430   // Here we can test the type of VT and return false when the type does not
   1431   // match, but since it is done prior to that call in the current context
   1432   // we turned that into an assert to avoid redundant code.
   1433   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1434          "Type checking must have been done before calling this function");
   1435 
   1436   // FIXME: simplify-demanded-bits in DAGCombine will probably have
   1437   // changed the AND node to a 32-bit mask operation. We'll have to
   1438   // undo that as part of the transform here if we want to catch all
   1439   // the opportunities.
   1440   // Currently the NumberOfIgnoredLowBits argument helps to recover
   1441   // form these situations when matching bigger pattern (bitfield insert).
   1442 
   1443   // For unsigned extracts, check for a shift right and mask
   1444   uint64_t And_imm = 0;
   1445   if (!isOpcWithIntImmediate(N, ISD::AND, And_imm))
   1446     return false;
   1447 
   1448   const SDNode *Op0 = N->getOperand(0).getNode();
   1449 
   1450   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
   1451   // simplified. Try to undo that
   1452   And_imm |= (1 << NumberOfIgnoredLowBits) - 1;
   1453 
   1454   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
   1455   if (And_imm & (And_imm + 1))
   1456     return false;
   1457 
   1458   bool ClampMSB = false;
   1459   uint64_t Srl_imm = 0;
   1460   // Handle the SRL + ANY_EXTEND case.
   1461   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
   1462       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, Srl_imm)) {
   1463     // Extend the incoming operand of the SRL to 64-bit.
   1464     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
   1465     // Make sure to clamp the MSB so that we preserve the semantics of the
   1466     // original operations.
   1467     ClampMSB = true;
   1468   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
   1469              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
   1470                                    Srl_imm)) {
   1471     // If the shift result was truncated, we can still combine them.
   1472     Opd0 = Op0->getOperand(0).getOperand(0);
   1473 
   1474     // Use the type of SRL node.
   1475     VT = Opd0->getValueType(0);
   1476   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, Srl_imm)) {
   1477     Opd0 = Op0->getOperand(0);
   1478   } else if (BiggerPattern) {
   1479     // Let's pretend a 0 shift right has been performed.
   1480     // The resulting code will be at least as good as the original one
   1481     // plus it may expose more opportunities for bitfield insert pattern.
   1482     // FIXME: Currently we limit this to the bigger pattern, because
   1483     // some optimizations expect AND and not UBFM.
   1484     Opd0 = N->getOperand(0);
   1485   } else
   1486     return false;
   1487 
   1488   // Bail out on large immediates. This happens when no proper
   1489   // combining/constant folding was performed.
   1490   if (!BiggerPattern && (Srl_imm <= 0 || Srl_imm >= VT.getSizeInBits())) {
   1491     DEBUG((dbgs() << N
   1492            << ": Found large shift immediate, this should not happen\n"));
   1493     return false;
   1494   }
   1495 
   1496   LSB = Srl_imm;
   1497   MSB = Srl_imm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(And_imm)
   1498                                   : countTrailingOnes<uint64_t>(And_imm)) -
   1499         1;
   1500   if (ClampMSB)
   1501     // Since we're moving the extend before the right shift operation, we need
   1502     // to clamp the MSB to make sure we don't shift in undefined bits instead of
   1503     // the zeros which would get shifted in with the original right shift
   1504     // operation.
   1505     MSB = MSB > 31 ? 31 : MSB;
   1506 
   1507   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   1508   return true;
   1509 }
   1510 
   1511 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
   1512                                           SDValue &Opd0, unsigned &LSB,
   1513                                           unsigned &MSB) {
   1514   // We are looking for the following pattern which basically extracts several
   1515   // continuous bits from the source value and places it from the LSB of the
   1516   // destination value, all other bits of the destination value or set to zero:
   1517   //
   1518   // Value2 = AND Value, MaskImm
   1519   // SRL Value2, ShiftImm
   1520   //
   1521   // with MaskImm >> ShiftImm to search for the bit width.
   1522   //
   1523   // This gets selected into a single UBFM:
   1524   //
   1525   // UBFM Value, ShiftImm, BitWide + Srl_imm -1
   1526   //
   1527 
   1528   if (N->getOpcode() != ISD::SRL)
   1529     return false;
   1530 
   1531   uint64_t And_mask = 0;
   1532   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_mask))
   1533     return false;
   1534 
   1535   Opd0 = N->getOperand(0).getOperand(0);
   1536 
   1537   uint64_t Srl_imm = 0;
   1538   if (!isIntImmediate(N->getOperand(1), Srl_imm))
   1539     return false;
   1540 
   1541   // Check whether we really have several bits extract here.
   1542   unsigned BitWide = 64 - countLeadingOnes(~(And_mask >> Srl_imm));
   1543   if (BitWide && isMask_64(And_mask >> Srl_imm)) {
   1544     if (N->getValueType(0) == MVT::i32)
   1545       Opc = AArch64::UBFMWri;
   1546     else
   1547       Opc = AArch64::UBFMXri;
   1548 
   1549     LSB = Srl_imm;
   1550     MSB = BitWide + Srl_imm - 1;
   1551     return true;
   1552   }
   1553 
   1554   return false;
   1555 }
   1556 
   1557 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   1558                                        unsigned &Immr, unsigned &Imms,
   1559                                        bool BiggerPattern) {
   1560   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
   1561          "N must be a SHR/SRA operation to call this function");
   1562 
   1563   EVT VT = N->getValueType(0);
   1564 
   1565   // Here we can test the type of VT and return false when the type does not
   1566   // match, but since it is done prior to that call in the current context
   1567   // we turned that into an assert to avoid redundant code.
   1568   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1569          "Type checking must have been done before calling this function");
   1570 
   1571   // Check for AND + SRL doing several bits extract.
   1572   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
   1573     return true;
   1574 
   1575   // we're looking for a shift of a shift
   1576   uint64_t Shl_imm = 0;
   1577   uint64_t Trunc_bits = 0;
   1578   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
   1579     Opd0 = N->getOperand(0).getOperand(0);
   1580   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
   1581              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
   1582     // We are looking for a shift of truncate. Truncate from i64 to i32 could
   1583     // be considered as setting high 32 bits as zero. Our strategy here is to
   1584     // always generate 64bit UBFM. This consistency will help the CSE pass
   1585     // later find more redundancy.
   1586     Opd0 = N->getOperand(0).getOperand(0);
   1587     Trunc_bits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
   1588     VT = Opd0->getValueType(0);
   1589     assert(VT == MVT::i64 && "the promoted type should be i64");
   1590   } else if (BiggerPattern) {
   1591     // Let's pretend a 0 shift left has been performed.
   1592     // FIXME: Currently we limit this to the bigger pattern case,
   1593     // because some optimizations expect AND and not UBFM
   1594     Opd0 = N->getOperand(0);
   1595   } else
   1596     return false;
   1597 
   1598   // Missing combines/constant folding may have left us with strange
   1599   // constants.
   1600   if (Shl_imm >= VT.getSizeInBits()) {
   1601     DEBUG((dbgs() << N
   1602            << ": Found large shift immediate, this should not happen\n"));
   1603     return false;
   1604   }
   1605 
   1606   uint64_t Srl_imm = 0;
   1607   if (!isIntImmediate(N->getOperand(1), Srl_imm))
   1608     return false;
   1609 
   1610   assert(Srl_imm > 0 && Srl_imm < VT.getSizeInBits() &&
   1611          "bad amount in shift node!");
   1612   int immr = Srl_imm - Shl_imm;
   1613   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
   1614   Imms = VT.getSizeInBits() - Shl_imm - Trunc_bits - 1;
   1615   // SRA requires a signed extraction
   1616   if (VT == MVT::i32)
   1617     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
   1618   else
   1619     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
   1620   return true;
   1621 }
   1622 
   1623 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   1624                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
   1625                                 unsigned NumberOfIgnoredLowBits = 0,
   1626                                 bool BiggerPattern = false) {
   1627   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
   1628     return false;
   1629 
   1630   switch (N->getOpcode()) {
   1631   default:
   1632     if (!N->isMachineOpcode())
   1633       return false;
   1634     break;
   1635   case ISD::AND:
   1636     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
   1637                                       NumberOfIgnoredLowBits, BiggerPattern);
   1638   case ISD::SRL:
   1639   case ISD::SRA:
   1640     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
   1641   }
   1642 
   1643   unsigned NOpc = N->getMachineOpcode();
   1644   switch (NOpc) {
   1645   default:
   1646     return false;
   1647   case AArch64::SBFMWri:
   1648   case AArch64::UBFMWri:
   1649   case AArch64::SBFMXri:
   1650   case AArch64::UBFMXri:
   1651     Opc = NOpc;
   1652     Opd0 = N->getOperand(0);
   1653     Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
   1654     Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
   1655     return true;
   1656   }
   1657   // Unreachable
   1658   return false;
   1659 }
   1660 
   1661 SDNode *AArch64DAGToDAGISel::SelectBitfieldExtractOp(SDNode *N) {
   1662   unsigned Opc, Immr, Imms;
   1663   SDValue Opd0;
   1664   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
   1665     return nullptr;
   1666 
   1667   EVT VT = N->getValueType(0);
   1668   SDLoc dl(N);
   1669 
   1670   // If the bit extract operation is 64bit but the original type is 32bit, we
   1671   // need to add one EXTRACT_SUBREG.
   1672   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
   1673     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
   1674                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
   1675 
   1676     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
   1677     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   1678     MachineSDNode *Node =
   1679         CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, MVT::i32,
   1680                                SDValue(BFM, 0), SubReg);
   1681     return Node;
   1682   }
   1683 
   1684   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
   1685                    CurDAG->getTargetConstant(Imms, dl, VT)};
   1686   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   1687 }
   1688 
   1689 /// Does DstMask form a complementary pair with the mask provided by
   1690 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
   1691 /// this asks whether DstMask zeroes precisely those bits that will be set by
   1692 /// the other half.
   1693 static bool isBitfieldDstMask(uint64_t DstMask, APInt BitsToBeInserted,
   1694                               unsigned NumberOfIgnoredHighBits, EVT VT) {
   1695   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1696          "i32 or i64 mask type expected!");
   1697   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
   1698 
   1699   APInt SignificantDstMask = APInt(BitWidth, DstMask);
   1700   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
   1701 
   1702   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
   1703          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
   1704 }
   1705 
   1706 // Look for bits that will be useful for later uses.
   1707 // A bit is consider useless as soon as it is dropped and never used
   1708 // before it as been dropped.
   1709 // E.g., looking for useful bit of x
   1710 // 1. y = x & 0x7
   1711 // 2. z = y >> 2
   1712 // After #1, x useful bits are 0x7, then the useful bits of x, live through
   1713 // y.
   1714 // After #2, the useful bits of x are 0x4.
   1715 // However, if x is used on an unpredicatable instruction, then all its bits
   1716 // are useful.
   1717 // E.g.
   1718 // 1. y = x & 0x7
   1719 // 2. z = y >> 2
   1720 // 3. str x, [@x]
   1721 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
   1722 
   1723 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
   1724                                               unsigned Depth) {
   1725   uint64_t Imm =
   1726       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   1727   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
   1728   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
   1729   getUsefulBits(Op, UsefulBits, Depth + 1);
   1730 }
   1731 
   1732 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
   1733                                              uint64_t Imm, uint64_t MSB,
   1734                                              unsigned Depth) {
   1735   // inherit the bitwidth value
   1736   APInt OpUsefulBits(UsefulBits);
   1737   OpUsefulBits = 1;
   1738 
   1739   if (MSB >= Imm) {
   1740     OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
   1741     --OpUsefulBits;
   1742     // The interesting part will be in the lower part of the result
   1743     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   1744     // The interesting part was starting at Imm in the argument
   1745     OpUsefulBits = OpUsefulBits.shl(Imm);
   1746   } else {
   1747     OpUsefulBits = OpUsefulBits.shl(MSB + 1);
   1748     --OpUsefulBits;
   1749     // The interesting part will be shifted in the result
   1750     OpUsefulBits = OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm);
   1751     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   1752     // The interesting part was at zero in the argument
   1753     OpUsefulBits = OpUsefulBits.lshr(OpUsefulBits.getBitWidth() - Imm);
   1754   }
   1755 
   1756   UsefulBits &= OpUsefulBits;
   1757 }
   1758 
   1759 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
   1760                                   unsigned Depth) {
   1761   uint64_t Imm =
   1762       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   1763   uint64_t MSB =
   1764       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1765 
   1766   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
   1767 }
   1768 
   1769 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
   1770                                               unsigned Depth) {
   1771   uint64_t ShiftTypeAndValue =
   1772       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1773   APInt Mask(UsefulBits);
   1774   Mask.clearAllBits();
   1775   Mask.flipAllBits();
   1776 
   1777   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
   1778     // Shift Left
   1779     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   1780     Mask = Mask.shl(ShiftAmt);
   1781     getUsefulBits(Op, Mask, Depth + 1);
   1782     Mask = Mask.lshr(ShiftAmt);
   1783   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
   1784     // Shift Right
   1785     // We do not handle AArch64_AM::ASR, because the sign will change the
   1786     // number of useful bits
   1787     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   1788     Mask = Mask.lshr(ShiftAmt);
   1789     getUsefulBits(Op, Mask, Depth + 1);
   1790     Mask = Mask.shl(ShiftAmt);
   1791   } else
   1792     return;
   1793 
   1794   UsefulBits &= Mask;
   1795 }
   1796 
   1797 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
   1798                                  unsigned Depth) {
   1799   uint64_t Imm =
   1800       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   1801   uint64_t MSB =
   1802       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
   1803 
   1804   if (Op.getOperand(1) == Orig)
   1805     return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
   1806 
   1807   APInt OpUsefulBits(UsefulBits);
   1808   OpUsefulBits = 1;
   1809 
   1810   if (MSB >= Imm) {
   1811     OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
   1812     --OpUsefulBits;
   1813     UsefulBits &= ~OpUsefulBits;
   1814     getUsefulBits(Op, UsefulBits, Depth + 1);
   1815   } else {
   1816     OpUsefulBits = OpUsefulBits.shl(MSB + 1);
   1817     --OpUsefulBits;
   1818     UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
   1819     getUsefulBits(Op, UsefulBits, Depth + 1);
   1820   }
   1821 }
   1822 
   1823 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
   1824                                 SDValue Orig, unsigned Depth) {
   1825 
   1826   // Users of this node should have already been instruction selected
   1827   // FIXME: Can we turn that into an assert?
   1828   if (!UserNode->isMachineOpcode())
   1829     return;
   1830 
   1831   switch (UserNode->getMachineOpcode()) {
   1832   default:
   1833     return;
   1834   case AArch64::ANDSWri:
   1835   case AArch64::ANDSXri:
   1836   case AArch64::ANDWri:
   1837   case AArch64::ANDXri:
   1838     // We increment Depth only when we call the getUsefulBits
   1839     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
   1840                                              Depth);
   1841   case AArch64::UBFMWri:
   1842   case AArch64::UBFMXri:
   1843     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
   1844 
   1845   case AArch64::ORRWrs:
   1846   case AArch64::ORRXrs:
   1847     if (UserNode->getOperand(1) != Orig)
   1848       return;
   1849     return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
   1850                                              Depth);
   1851   case AArch64::BFMWri:
   1852   case AArch64::BFMXri:
   1853     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
   1854   }
   1855 }
   1856 
   1857 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
   1858   if (Depth >= 6)
   1859     return;
   1860   // Initialize UsefulBits
   1861   if (!Depth) {
   1862     unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
   1863     // At the beginning, assume every produced bits is useful
   1864     UsefulBits = APInt(Bitwidth, 0);
   1865     UsefulBits.flipAllBits();
   1866   }
   1867   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
   1868 
   1869   for (SDNode *Node : Op.getNode()->uses()) {
   1870     // A use cannot produce useful bits
   1871     APInt UsefulBitsForUse = APInt(UsefulBits);
   1872     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
   1873     UsersUsefulBits |= UsefulBitsForUse;
   1874   }
   1875   // UsefulBits contains the produced bits that are meaningful for the
   1876   // current definition, thus a user cannot make a bit meaningful at
   1877   // this point
   1878   UsefulBits &= UsersUsefulBits;
   1879 }
   1880 
   1881 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
   1882 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
   1883 /// 0, return Op unchanged.
   1884 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
   1885   if (ShlAmount == 0)
   1886     return Op;
   1887 
   1888   EVT VT = Op.getValueType();
   1889   SDLoc dl(Op);
   1890   unsigned BitWidth = VT.getSizeInBits();
   1891   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   1892 
   1893   SDNode *ShiftNode;
   1894   if (ShlAmount > 0) {
   1895     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
   1896     ShiftNode = CurDAG->getMachineNode(
   1897         UBFMOpc, dl, VT, Op,
   1898         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
   1899         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
   1900   } else {
   1901     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
   1902     assert(ShlAmount < 0 && "expected right shift");
   1903     int ShrAmount = -ShlAmount;
   1904     ShiftNode = CurDAG->getMachineNode(
   1905         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
   1906         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
   1907   }
   1908 
   1909   return SDValue(ShiftNode, 0);
   1910 }
   1911 
   1912 /// Does this tree qualify as an attempt to move a bitfield into position,
   1913 /// essentially "(and (shl VAL, N), Mask)".
   1914 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
   1915                                     bool BiggerPattern,
   1916                                     SDValue &Src, int &ShiftAmount,
   1917                                     int &MaskWidth) {
   1918   EVT VT = Op.getValueType();
   1919   unsigned BitWidth = VT.getSizeInBits();
   1920   (void)BitWidth;
   1921   assert(BitWidth == 32 || BitWidth == 64);
   1922 
   1923   APInt KnownZero, KnownOne;
   1924   CurDAG->computeKnownBits(Op, KnownZero, KnownOne);
   1925 
   1926   // Non-zero in the sense that they're not provably zero, which is the key
   1927   // point if we want to use this value
   1928   uint64_t NonZeroBits = (~KnownZero).getZExtValue();
   1929 
   1930   // Discard a constant AND mask if present. It's safe because the node will
   1931   // already have been factored into the computeKnownBits calculation above.
   1932   uint64_t AndImm;
   1933   if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
   1934     assert((~APInt(BitWidth, AndImm) & ~KnownZero) == 0);
   1935     Op = Op.getOperand(0);
   1936   }
   1937 
   1938   // Don't match if the SHL has more than one use, since then we'll end up
   1939   // generating SHL+UBFIZ instead of just keeping SHL+AND.
   1940   if (!BiggerPattern && !Op.hasOneUse())
   1941     return false;
   1942 
   1943   uint64_t ShlImm;
   1944   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
   1945     return false;
   1946   Op = Op.getOperand(0);
   1947 
   1948   if (!isShiftedMask_64(NonZeroBits))
   1949     return false;
   1950 
   1951   ShiftAmount = countTrailingZeros(NonZeroBits);
   1952   MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
   1953 
   1954   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   1955   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
   1956   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
   1957   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
   1958   // which case it is not profitable to insert an extra shift.
   1959   if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
   1960     return false;
   1961   Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
   1962 
   1963   return true;
   1964 }
   1965 
   1966 // Given a OR operation, check if we have the following pattern
   1967 // ubfm c, b, imm, imm2 (or something that does the same jobs, see
   1968 //                       isBitfieldExtractOp)
   1969 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
   1970 //                 countTrailingZeros(mask2) == imm2 - imm + 1
   1971 // f = d | c
   1972 // if yes, given reference arguments will be update so that one can replace
   1973 // the OR instruction with:
   1974 // f = Opc Opd0, Opd1, LSB, MSB ; where Opc is a BFM, LSB = imm, and MSB = imm2
   1975 static bool isBitfieldInsertOpFromOr(SDNode *N, unsigned &Opc, SDValue &Dst,
   1976                                      SDValue &Src, unsigned &ImmR,
   1977                                      unsigned &ImmS, const APInt &UsefulBits,
   1978                                      SelectionDAG *CurDAG) {
   1979   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
   1980 
   1981   // Set Opc
   1982   EVT VT = N->getValueType(0);
   1983   if (VT == MVT::i32)
   1984     Opc = AArch64::BFMWri;
   1985   else if (VT == MVT::i64)
   1986     Opc = AArch64::BFMXri;
   1987   else
   1988     return false;
   1989 
   1990   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
   1991   // have the expected shape. Try to undo that.
   1992 
   1993   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
   1994   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
   1995 
   1996   // OR is commutative, check all combinations of operand order and values of
   1997   // BiggerPattern, i.e.
   1998   //     Opd0, Opd1, BiggerPattern=false
   1999   //     Opd1, Opd0, BiggerPattern=false
   2000   //     Opd0, Opd1, BiggerPattern=true
   2001   //     Opd1, Opd0, BiggerPattern=true
   2002   // Several of these combinations may match, so check with BiggerPattern=false
   2003   // first since that will produce better results by matching more instructions
   2004   // and/or inserting fewer extra instructions.
   2005   for (int I = 0; I < 4; ++I) {
   2006 
   2007     bool BiggerPattern = I / 2;
   2008     SDNode *OrOpd0 = N->getOperand(I % 2).getNode();
   2009     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
   2010     SDNode *OrOpd1 = OrOpd1Val.getNode();
   2011 
   2012     unsigned BFXOpc;
   2013     int DstLSB, Width;
   2014     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
   2015                             NumberOfIgnoredLowBits, BiggerPattern)) {
   2016       // Check that the returned opcode is compatible with the pattern,
   2017       // i.e., same type and zero extended (U and not S)
   2018       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
   2019           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
   2020         continue;
   2021 
   2022       // Compute the width of the bitfield insertion
   2023       DstLSB = 0;
   2024       Width = ImmS - ImmR + 1;
   2025       // FIXME: This constraint is to catch bitfield insertion we may
   2026       // want to widen the pattern if we want to grab general bitfied
   2027       // move case
   2028       if (Width <= 0)
   2029         continue;
   2030 
   2031       // If the mask on the insertee is correct, we have a BFXIL operation. We
   2032       // can share the ImmR and ImmS values from the already-computed UBFM.
   2033     } else if (isBitfieldPositioningOp(CurDAG, SDValue(OrOpd0, 0),
   2034                                        BiggerPattern,
   2035                                        Src, DstLSB, Width)) {
   2036       ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
   2037       ImmS = Width - 1;
   2038     } else
   2039       continue;
   2040 
   2041     // Check the second part of the pattern
   2042     EVT VT = OrOpd1->getValueType(0);
   2043     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
   2044 
   2045     // Compute the Known Zero for the candidate of the first operand.
   2046     // This allows to catch more general case than just looking for
   2047     // AND with imm. Indeed, simplify-demanded-bits may have removed
   2048     // the AND instruction because it proves it was useless.
   2049     APInt KnownZero, KnownOne;
   2050     CurDAG->computeKnownBits(OrOpd1Val, KnownZero, KnownOne);
   2051 
   2052     // Check if there is enough room for the second operand to appear
   2053     // in the first one
   2054     APInt BitsToBeInserted =
   2055         APInt::getBitsSet(KnownZero.getBitWidth(), DstLSB, DstLSB + Width);
   2056 
   2057     if ((BitsToBeInserted & ~KnownZero) != 0)
   2058       continue;
   2059 
   2060     // Set the first operand
   2061     uint64_t Imm;
   2062     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
   2063         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
   2064       // In that case, we can eliminate the AND
   2065       Dst = OrOpd1->getOperand(0);
   2066     else
   2067       // Maybe the AND has been removed by simplify-demanded-bits
   2068       // or is useful because it discards more bits
   2069       Dst = OrOpd1Val;
   2070 
   2071     // both parts match
   2072     return true;
   2073   }
   2074 
   2075   return false;
   2076 }
   2077 
   2078 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertOp(SDNode *N) {
   2079   if (N->getOpcode() != ISD::OR)
   2080     return nullptr;
   2081 
   2082   unsigned Opc;
   2083   unsigned LSB, MSB;
   2084   SDValue Opd0, Opd1;
   2085   EVT VT = N->getValueType(0);
   2086   APInt NUsefulBits;
   2087   getUsefulBits(SDValue(N, 0), NUsefulBits);
   2088 
   2089   // If all bits are not useful, just return UNDEF.
   2090   if (!NUsefulBits)
   2091     return CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, VT);
   2092 
   2093   if (!isBitfieldInsertOpFromOr(N, Opc, Opd0, Opd1, LSB, MSB, NUsefulBits,
   2094                                 CurDAG))
   2095     return nullptr;
   2096 
   2097   SDLoc dl(N);
   2098   SDValue Ops[] = { Opd0,
   2099                     Opd1,
   2100                     CurDAG->getTargetConstant(LSB, dl, VT),
   2101                     CurDAG->getTargetConstant(MSB, dl, VT) };
   2102   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2103 }
   2104 
   2105 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
   2106 /// equivalent of a left shift by a constant amount followed by an and masking
   2107 /// out a contiguous set of bits.
   2108 SDNode *AArch64DAGToDAGISel::SelectBitfieldInsertInZeroOp(SDNode *N) {
   2109   if (N->getOpcode() != ISD::AND)
   2110     return nullptr;
   2111 
   2112   EVT VT = N->getValueType(0);
   2113   unsigned Opc;
   2114   if (VT == MVT::i32)
   2115     Opc = AArch64::UBFMWri;
   2116   else if (VT == MVT::i64)
   2117     Opc = AArch64::UBFMXri;
   2118   else
   2119     return nullptr;
   2120 
   2121   SDValue Op0;
   2122   int DstLSB, Width;
   2123   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
   2124                                Op0, DstLSB, Width))
   2125     return nullptr;
   2126 
   2127   // ImmR is the rotate right amount.
   2128   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
   2129   // ImmS is the most significant bit of the source to be moved.
   2130   unsigned ImmS = Width - 1;
   2131 
   2132   SDLoc DL(N);
   2133   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
   2134                    CurDAG->getTargetConstant(ImmS, DL, VT)};
   2135   return CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2136 }
   2137 
   2138 bool
   2139 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   2140                                               unsigned RegWidth) {
   2141   APFloat FVal(0.0);
   2142   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
   2143     FVal = CN->getValueAPF();
   2144   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
   2145     // Some otherwise illegal constants are allowed in this case.
   2146     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
   2147         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
   2148       return false;
   2149 
   2150     ConstantPoolSDNode *CN =
   2151         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
   2152     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
   2153   } else
   2154     return false;
   2155 
   2156   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
   2157   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
   2158   // x-register.
   2159   //
   2160   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
   2161   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
   2162   // integers.
   2163   bool IsExact;
   2164 
   2165   // fbits is between 1 and 64 in the worst-case, which means the fmul
   2166   // could have 2^64 as an actual operand. Need 65 bits of precision.
   2167   APSInt IntVal(65, true);
   2168   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
   2169 
   2170   // N.b. isPowerOf2 also checks for > 0.
   2171   if (!IsExact || !IntVal.isPowerOf2()) return false;
   2172   unsigned FBits = IntVal.logBase2();
   2173 
   2174   // Checks above should have guaranteed that we haven't lost information in
   2175   // finding FBits, but it must still be in range.
   2176   if (FBits == 0 || FBits > RegWidth) return false;
   2177 
   2178   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
   2179   return true;
   2180 }
   2181 
   2182 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
   2183 // of the string and obtains the integer values from them and combines these
   2184 // into a single value to be used in the MRS/MSR instruction.
   2185 static int getIntOperandFromRegisterString(StringRef RegString) {
   2186   SmallVector<StringRef, 5> Fields;
   2187   RegString.split(Fields, ':');
   2188 
   2189   if (Fields.size() == 1)
   2190     return -1;
   2191 
   2192   assert(Fields.size() == 5
   2193             && "Invalid number of fields in read register string");
   2194 
   2195   SmallVector<int, 5> Ops;
   2196   bool AllIntFields = true;
   2197 
   2198   for (StringRef Field : Fields) {
   2199     unsigned IntField;
   2200     AllIntFields &= !Field.getAsInteger(10, IntField);
   2201     Ops.push_back(IntField);
   2202   }
   2203 
   2204   assert(AllIntFields &&
   2205           "Unexpected non-integer value in special register string.");
   2206 
   2207   // Need to combine the integer fields of the string into a single value
   2208   // based on the bit encoding of MRS/MSR instruction.
   2209   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
   2210          (Ops[3] << 3) | (Ops[4]);
   2211 }
   2212 
   2213 // Lower the read_register intrinsic to an MRS instruction node if the special
   2214 // register string argument is either of the form detailed in the ALCE (the
   2215 // form described in getIntOperandsFromRegsterString) or is a named register
   2216 // known by the MRS SysReg mapper.
   2217 SDNode *AArch64DAGToDAGISel::SelectReadRegister(SDNode *N) {
   2218   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   2219   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   2220   SDLoc DL(N);
   2221 
   2222   int Reg = getIntOperandFromRegisterString(RegString->getString());
   2223   if (Reg != -1)
   2224     return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
   2225                                   MVT::Other,
   2226                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2227                                   N->getOperand(0));
   2228 
   2229   // Use the sysreg mapper to map the remaining possible strings to the
   2230   // value for the register to be used for the instruction operand.
   2231   AArch64SysReg::MRSMapper mapper;
   2232   bool IsValidSpecialReg;
   2233   Reg = mapper.fromString(RegString->getString(),
   2234                           Subtarget->getFeatureBits(),
   2235                           IsValidSpecialReg);
   2236   if (IsValidSpecialReg)
   2237     return CurDAG->getMachineNode(AArch64::MRS, DL, N->getSimpleValueType(0),
   2238                                   MVT::Other,
   2239                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2240                                   N->getOperand(0));
   2241 
   2242   return nullptr;
   2243 }
   2244 
   2245 // Lower the write_register intrinsic to an MSR instruction node if the special
   2246 // register string argument is either of the form detailed in the ALCE (the
   2247 // form described in getIntOperandsFromRegsterString) or is a named register
   2248 // known by the MSR SysReg mapper.
   2249 SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
   2250   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   2251   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   2252   SDLoc DL(N);
   2253 
   2254   int Reg = getIntOperandFromRegisterString(RegString->getString());
   2255   if (Reg != -1)
   2256     return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
   2257                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2258                                   N->getOperand(2), N->getOperand(0));
   2259 
   2260   // Check if the register was one of those allowed as the pstatefield value in
   2261   // the MSR (immediate) instruction. To accept the values allowed in the
   2262   // pstatefield for the MSR (immediate) instruction, we also require that an
   2263   // immediate value has been provided as an argument, we know that this is
   2264   // the case as it has been ensured by semantic checking.
   2265   AArch64PState::PStateMapper PMapper;
   2266   bool IsValidSpecialReg;
   2267   Reg = PMapper.fromString(RegString->getString(),
   2268                            Subtarget->getFeatureBits(),
   2269                            IsValidSpecialReg);
   2270   if (IsValidSpecialReg) {
   2271     assert (isa<ConstantSDNode>(N->getOperand(2))
   2272               && "Expected a constant integer expression.");
   2273     uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
   2274     unsigned State;
   2275     if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO) {
   2276       assert(Immed < 2 && "Bad imm");
   2277       State = AArch64::MSRpstateImm1;
   2278     } else {
   2279       assert(Immed < 16 && "Bad imm");
   2280       State = AArch64::MSRpstateImm4;
   2281     }
   2282     return CurDAG->getMachineNode(State, DL, MVT::Other,
   2283                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2284                                   CurDAG->getTargetConstant(Immed, DL, MVT::i16),
   2285                                   N->getOperand(0));
   2286   }
   2287 
   2288   // Use the sysreg mapper to attempt to map the remaining possible strings
   2289   // to the value for the register to be used for the MSR (register)
   2290   // instruction operand.
   2291   AArch64SysReg::MSRMapper Mapper;
   2292   Reg = Mapper.fromString(RegString->getString(),
   2293                           Subtarget->getFeatureBits(),
   2294                           IsValidSpecialReg);
   2295 
   2296   if (IsValidSpecialReg)
   2297     return CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
   2298                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2299                                   N->getOperand(2), N->getOperand(0));
   2300 
   2301   return nullptr;
   2302 }
   2303 
   2304 SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
   2305   // Dump information about the Node being selected
   2306   DEBUG(errs() << "Selecting: ");
   2307   DEBUG(Node->dump(CurDAG));
   2308   DEBUG(errs() << "\n");
   2309 
   2310   // If we have a custom node, we already have selected!
   2311   if (Node->isMachineOpcode()) {
   2312     DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
   2313     Node->setNodeId(-1);
   2314     return nullptr;
   2315   }
   2316 
   2317   // Few custom selection stuff.
   2318   SDNode *ResNode = nullptr;
   2319   EVT VT = Node->getValueType(0);
   2320 
   2321   switch (Node->getOpcode()) {
   2322   default:
   2323     break;
   2324 
   2325   case ISD::READ_REGISTER:
   2326     if (SDNode *Res = SelectReadRegister(Node))
   2327       return Res;
   2328     break;
   2329 
   2330   case ISD::WRITE_REGISTER:
   2331     if (SDNode *Res = SelectWriteRegister(Node))
   2332       return Res;
   2333     break;
   2334 
   2335   case ISD::ADD:
   2336     if (SDNode *I = SelectMLAV64LaneV128(Node))
   2337       return I;
   2338     break;
   2339 
   2340   case ISD::LOAD: {
   2341     // Try to select as an indexed load. Fall through to normal processing
   2342     // if we can't.
   2343     bool Done = false;
   2344     SDNode *I = SelectIndexedLoad(Node, Done);
   2345     if (Done)
   2346       return I;
   2347     break;
   2348   }
   2349 
   2350   case ISD::SRL:
   2351   case ISD::AND:
   2352   case ISD::SRA:
   2353     if (SDNode *I = SelectBitfieldExtractOp(Node))
   2354       return I;
   2355     if (SDNode *I = SelectBitfieldInsertInZeroOp(Node))
   2356       return I;
   2357     break;
   2358 
   2359   case ISD::OR:
   2360     if (SDNode *I = SelectBitfieldInsertOp(Node))
   2361       return I;
   2362     break;
   2363 
   2364   case ISD::EXTRACT_VECTOR_ELT: {
   2365     // Extracting lane zero is a special case where we can just use a plain
   2366     // EXTRACT_SUBREG instruction, which will become FMOV. This is easier for
   2367     // the rest of the compiler, especially the register allocator and copyi
   2368     // propagation, to reason about, so is preferred when it's possible to
   2369     // use it.
   2370     ConstantSDNode *LaneNode = cast<ConstantSDNode>(Node->getOperand(1));
   2371     // Bail and use the default Select() for non-zero lanes.
   2372     if (LaneNode->getZExtValue() != 0)
   2373       break;
   2374     // If the element type is not the same as the result type, likewise
   2375     // bail and use the default Select(), as there's more to do than just
   2376     // a cross-class COPY. This catches extracts of i8 and i16 elements
   2377     // since they will need an explicit zext.
   2378     if (VT != Node->getOperand(0).getValueType().getVectorElementType())
   2379       break;
   2380     unsigned SubReg;
   2381     switch (Node->getOperand(0)
   2382                 .getValueType()
   2383                 .getVectorElementType()
   2384                 .getSizeInBits()) {
   2385     default:
   2386       llvm_unreachable("Unexpected vector element type!");
   2387     case 64:
   2388       SubReg = AArch64::dsub;
   2389       break;
   2390     case 32:
   2391       SubReg = AArch64::ssub;
   2392       break;
   2393     case 16:
   2394       SubReg = AArch64::hsub;
   2395       break;
   2396     case 8:
   2397       llvm_unreachable("unexpected zext-requiring extract element!");
   2398     }
   2399     SDValue Extract = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(Node), VT,
   2400                                                      Node->getOperand(0));
   2401     DEBUG(dbgs() << "ISEL: Custom selection!\n=> ");
   2402     DEBUG(Extract->dumpr(CurDAG));
   2403     DEBUG(dbgs() << "\n");
   2404     return Extract.getNode();
   2405   }
   2406   case ISD::Constant: {
   2407     // Materialize zero constants as copies from WZR/XZR.  This allows
   2408     // the coalescer to propagate these into other instructions.
   2409     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
   2410     if (ConstNode->isNullValue()) {
   2411       if (VT == MVT::i32)
   2412         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
   2413                                       AArch64::WZR, MVT::i32).getNode();
   2414       else if (VT == MVT::i64)
   2415         return CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(Node),
   2416                                       AArch64::XZR, MVT::i64).getNode();
   2417     }
   2418     break;
   2419   }
   2420 
   2421   case ISD::FrameIndex: {
   2422     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
   2423     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
   2424     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
   2425     const TargetLowering *TLI = getTargetLowering();
   2426     SDValue TFI = CurDAG->getTargetFrameIndex(
   2427         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   2428     SDLoc DL(Node);
   2429     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
   2430                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
   2431     return CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
   2432   }
   2433   case ISD::INTRINSIC_W_CHAIN: {
   2434     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   2435     switch (IntNo) {
   2436     default:
   2437       break;
   2438     case Intrinsic::aarch64_ldaxp:
   2439     case Intrinsic::aarch64_ldxp: {
   2440       unsigned Op =
   2441           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
   2442       SDValue MemAddr = Node->getOperand(2);
   2443       SDLoc DL(Node);
   2444       SDValue Chain = Node->getOperand(0);
   2445 
   2446       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
   2447                                           MVT::Other, MemAddr, Chain);
   2448 
   2449       // Transfer memoperands.
   2450       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2451       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   2452       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
   2453       return Ld;
   2454     }
   2455     case Intrinsic::aarch64_stlxp:
   2456     case Intrinsic::aarch64_stxp: {
   2457       unsigned Op =
   2458           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
   2459       SDLoc DL(Node);
   2460       SDValue Chain = Node->getOperand(0);
   2461       SDValue ValLo = Node->getOperand(2);
   2462       SDValue ValHi = Node->getOperand(3);
   2463       SDValue MemAddr = Node->getOperand(4);
   2464 
   2465       // Place arguments in the right order.
   2466       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
   2467 
   2468       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
   2469       // Transfer memoperands.
   2470       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2471       MemOp[0] = cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   2472       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   2473 
   2474       return St;
   2475     }
   2476     case Intrinsic::aarch64_neon_ld1x2:
   2477       if (VT == MVT::v8i8)
   2478         return SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
   2479       else if (VT == MVT::v16i8)
   2480         return SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
   2481       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2482         return SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
   2483       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2484         return SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
   2485       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2486         return SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
   2487       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2488         return SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
   2489       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2490         return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   2491       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2492         return SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
   2493       break;
   2494     case Intrinsic::aarch64_neon_ld1x3:
   2495       if (VT == MVT::v8i8)
   2496         return SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
   2497       else if (VT == MVT::v16i8)
   2498         return SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
   2499       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2500         return SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
   2501       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2502         return SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
   2503       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2504         return SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
   2505       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2506         return SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
   2507       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2508         return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   2509       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2510         return SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
   2511       break;
   2512     case Intrinsic::aarch64_neon_ld1x4:
   2513       if (VT == MVT::v8i8)
   2514         return SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
   2515       else if (VT == MVT::v16i8)
   2516         return SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
   2517       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2518         return SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
   2519       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2520         return SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
   2521       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2522         return SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
   2523       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2524         return SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
   2525       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2526         return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   2527       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2528         return SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
   2529       break;
   2530     case Intrinsic::aarch64_neon_ld2:
   2531       if (VT == MVT::v8i8)
   2532         return SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
   2533       else if (VT == MVT::v16i8)
   2534         return SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
   2535       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2536         return SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
   2537       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2538         return SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
   2539       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2540         return SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
   2541       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2542         return SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
   2543       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2544         return SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   2545       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2546         return SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
   2547       break;
   2548     case Intrinsic::aarch64_neon_ld3:
   2549       if (VT == MVT::v8i8)
   2550         return SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
   2551       else if (VT == MVT::v16i8)
   2552         return SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
   2553       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2554         return SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
   2555       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2556         return SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
   2557       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2558         return SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
   2559       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2560         return SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
   2561       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2562         return SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   2563       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2564         return SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
   2565       break;
   2566     case Intrinsic::aarch64_neon_ld4:
   2567       if (VT == MVT::v8i8)
   2568         return SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
   2569       else if (VT == MVT::v16i8)
   2570         return SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
   2571       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2572         return SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
   2573       else if (VT == MVT::v8i16  || VT == MVT::v8f16)
   2574         return SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
   2575       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2576         return SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
   2577       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2578         return SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
   2579       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2580         return SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   2581       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2582         return SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
   2583       break;
   2584     case Intrinsic::aarch64_neon_ld2r:
   2585       if (VT == MVT::v8i8)
   2586         return SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
   2587       else if (VT == MVT::v16i8)
   2588         return SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
   2589       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2590         return SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
   2591       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2592         return SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
   2593       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2594         return SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
   2595       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2596         return SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
   2597       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2598         return SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
   2599       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2600         return SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
   2601       break;
   2602     case Intrinsic::aarch64_neon_ld3r:
   2603       if (VT == MVT::v8i8)
   2604         return SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
   2605       else if (VT == MVT::v16i8)
   2606         return SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
   2607       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2608         return SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
   2609       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2610         return SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
   2611       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2612         return SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
   2613       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2614         return SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
   2615       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2616         return SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
   2617       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2618         return SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
   2619       break;
   2620     case Intrinsic::aarch64_neon_ld4r:
   2621       if (VT == MVT::v8i8)
   2622         return SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
   2623       else if (VT == MVT::v16i8)
   2624         return SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
   2625       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2626         return SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
   2627       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2628         return SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
   2629       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2630         return SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
   2631       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2632         return SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
   2633       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2634         return SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
   2635       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2636         return SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
   2637       break;
   2638     case Intrinsic::aarch64_neon_ld2lane:
   2639       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2640         return SelectLoadLane(Node, 2, AArch64::LD2i8);
   2641       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2642                VT == MVT::v8f16)
   2643         return SelectLoadLane(Node, 2, AArch64::LD2i16);
   2644       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2645                VT == MVT::v2f32)
   2646         return SelectLoadLane(Node, 2, AArch64::LD2i32);
   2647       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2648                VT == MVT::v1f64)
   2649         return SelectLoadLane(Node, 2, AArch64::LD2i64);
   2650       break;
   2651     case Intrinsic::aarch64_neon_ld3lane:
   2652       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2653         return SelectLoadLane(Node, 3, AArch64::LD3i8);
   2654       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2655                VT == MVT::v8f16)
   2656         return SelectLoadLane(Node, 3, AArch64::LD3i16);
   2657       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2658                VT == MVT::v2f32)
   2659         return SelectLoadLane(Node, 3, AArch64::LD3i32);
   2660       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2661                VT == MVT::v1f64)
   2662         return SelectLoadLane(Node, 3, AArch64::LD3i64);
   2663       break;
   2664     case Intrinsic::aarch64_neon_ld4lane:
   2665       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2666         return SelectLoadLane(Node, 4, AArch64::LD4i8);
   2667       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2668                VT == MVT::v8f16)
   2669         return SelectLoadLane(Node, 4, AArch64::LD4i16);
   2670       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2671                VT == MVT::v2f32)
   2672         return SelectLoadLane(Node, 4, AArch64::LD4i32);
   2673       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2674                VT == MVT::v1f64)
   2675         return SelectLoadLane(Node, 4, AArch64::LD4i64);
   2676       break;
   2677     }
   2678   } break;
   2679   case ISD::INTRINSIC_WO_CHAIN: {
   2680     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
   2681     switch (IntNo) {
   2682     default:
   2683       break;
   2684     case Intrinsic::aarch64_neon_tbl2:
   2685       return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBLv8i8Two
   2686                                                   : AArch64::TBLv16i8Two,
   2687                          false);
   2688     case Intrinsic::aarch64_neon_tbl3:
   2689       return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
   2690                                                   : AArch64::TBLv16i8Three,
   2691                          false);
   2692     case Intrinsic::aarch64_neon_tbl4:
   2693       return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
   2694                                                   : AArch64::TBLv16i8Four,
   2695                          false);
   2696     case Intrinsic::aarch64_neon_tbx2:
   2697       return SelectTable(Node, 2, VT == MVT::v8i8 ? AArch64::TBXv8i8Two
   2698                                                   : AArch64::TBXv16i8Two,
   2699                          true);
   2700     case Intrinsic::aarch64_neon_tbx3:
   2701       return SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
   2702                                                   : AArch64::TBXv16i8Three,
   2703                          true);
   2704     case Intrinsic::aarch64_neon_tbx4:
   2705       return SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
   2706                                                   : AArch64::TBXv16i8Four,
   2707                          true);
   2708     case Intrinsic::aarch64_neon_smull:
   2709     case Intrinsic::aarch64_neon_umull:
   2710       if (SDNode *N = SelectMULLV64LaneV128(IntNo, Node))
   2711         return N;
   2712       break;
   2713     }
   2714     break;
   2715   }
   2716   case ISD::INTRINSIC_VOID: {
   2717     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   2718     if (Node->getNumOperands() >= 3)
   2719       VT = Node->getOperand(2)->getValueType(0);
   2720     switch (IntNo) {
   2721     default:
   2722       break;
   2723     case Intrinsic::aarch64_neon_st1x2: {
   2724       if (VT == MVT::v8i8)
   2725         return SelectStore(Node, 2, AArch64::ST1Twov8b);
   2726       else if (VT == MVT::v16i8)
   2727         return SelectStore(Node, 2, AArch64::ST1Twov16b);
   2728       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2729         return SelectStore(Node, 2, AArch64::ST1Twov4h);
   2730       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2731         return SelectStore(Node, 2, AArch64::ST1Twov8h);
   2732       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2733         return SelectStore(Node, 2, AArch64::ST1Twov2s);
   2734       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2735         return SelectStore(Node, 2, AArch64::ST1Twov4s);
   2736       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2737         return SelectStore(Node, 2, AArch64::ST1Twov2d);
   2738       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2739         return SelectStore(Node, 2, AArch64::ST1Twov1d);
   2740       break;
   2741     }
   2742     case Intrinsic::aarch64_neon_st1x3: {
   2743       if (VT == MVT::v8i8)
   2744         return SelectStore(Node, 3, AArch64::ST1Threev8b);
   2745       else if (VT == MVT::v16i8)
   2746         return SelectStore(Node, 3, AArch64::ST1Threev16b);
   2747       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2748         return SelectStore(Node, 3, AArch64::ST1Threev4h);
   2749       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2750         return SelectStore(Node, 3, AArch64::ST1Threev8h);
   2751       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2752         return SelectStore(Node, 3, AArch64::ST1Threev2s);
   2753       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2754         return SelectStore(Node, 3, AArch64::ST1Threev4s);
   2755       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2756         return SelectStore(Node, 3, AArch64::ST1Threev2d);
   2757       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2758         return SelectStore(Node, 3, AArch64::ST1Threev1d);
   2759       break;
   2760     }
   2761     case Intrinsic::aarch64_neon_st1x4: {
   2762       if (VT == MVT::v8i8)
   2763         return SelectStore(Node, 4, AArch64::ST1Fourv8b);
   2764       else if (VT == MVT::v16i8)
   2765         return SelectStore(Node, 4, AArch64::ST1Fourv16b);
   2766       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2767         return SelectStore(Node, 4, AArch64::ST1Fourv4h);
   2768       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2769         return SelectStore(Node, 4, AArch64::ST1Fourv8h);
   2770       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2771         return SelectStore(Node, 4, AArch64::ST1Fourv2s);
   2772       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2773         return SelectStore(Node, 4, AArch64::ST1Fourv4s);
   2774       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2775         return SelectStore(Node, 4, AArch64::ST1Fourv2d);
   2776       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2777         return SelectStore(Node, 4, AArch64::ST1Fourv1d);
   2778       break;
   2779     }
   2780     case Intrinsic::aarch64_neon_st2: {
   2781       if (VT == MVT::v8i8)
   2782         return SelectStore(Node, 2, AArch64::ST2Twov8b);
   2783       else if (VT == MVT::v16i8)
   2784         return SelectStore(Node, 2, AArch64::ST2Twov16b);
   2785       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2786         return SelectStore(Node, 2, AArch64::ST2Twov4h);
   2787       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2788         return SelectStore(Node, 2, AArch64::ST2Twov8h);
   2789       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2790         return SelectStore(Node, 2, AArch64::ST2Twov2s);
   2791       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2792         return SelectStore(Node, 2, AArch64::ST2Twov4s);
   2793       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2794         return SelectStore(Node, 2, AArch64::ST2Twov2d);
   2795       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2796         return SelectStore(Node, 2, AArch64::ST1Twov1d);
   2797       break;
   2798     }
   2799     case Intrinsic::aarch64_neon_st3: {
   2800       if (VT == MVT::v8i8)
   2801         return SelectStore(Node, 3, AArch64::ST3Threev8b);
   2802       else if (VT == MVT::v16i8)
   2803         return SelectStore(Node, 3, AArch64::ST3Threev16b);
   2804       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2805         return SelectStore(Node, 3, AArch64::ST3Threev4h);
   2806       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2807         return SelectStore(Node, 3, AArch64::ST3Threev8h);
   2808       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2809         return SelectStore(Node, 3, AArch64::ST3Threev2s);
   2810       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2811         return SelectStore(Node, 3, AArch64::ST3Threev4s);
   2812       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2813         return SelectStore(Node, 3, AArch64::ST3Threev2d);
   2814       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2815         return SelectStore(Node, 3, AArch64::ST1Threev1d);
   2816       break;
   2817     }
   2818     case Intrinsic::aarch64_neon_st4: {
   2819       if (VT == MVT::v8i8)
   2820         return SelectStore(Node, 4, AArch64::ST4Fourv8b);
   2821       else if (VT == MVT::v16i8)
   2822         return SelectStore(Node, 4, AArch64::ST4Fourv16b);
   2823       else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2824         return SelectStore(Node, 4, AArch64::ST4Fourv4h);
   2825       else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2826         return SelectStore(Node, 4, AArch64::ST4Fourv8h);
   2827       else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2828         return SelectStore(Node, 4, AArch64::ST4Fourv2s);
   2829       else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2830         return SelectStore(Node, 4, AArch64::ST4Fourv4s);
   2831       else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2832         return SelectStore(Node, 4, AArch64::ST4Fourv2d);
   2833       else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2834         return SelectStore(Node, 4, AArch64::ST1Fourv1d);
   2835       break;
   2836     }
   2837     case Intrinsic::aarch64_neon_st2lane: {
   2838       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2839         return SelectStoreLane(Node, 2, AArch64::ST2i8);
   2840       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2841                VT == MVT::v8f16)
   2842         return SelectStoreLane(Node, 2, AArch64::ST2i16);
   2843       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2844                VT == MVT::v2f32)
   2845         return SelectStoreLane(Node, 2, AArch64::ST2i32);
   2846       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2847                VT == MVT::v1f64)
   2848         return SelectStoreLane(Node, 2, AArch64::ST2i64);
   2849       break;
   2850     }
   2851     case Intrinsic::aarch64_neon_st3lane: {
   2852       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2853         return SelectStoreLane(Node, 3, AArch64::ST3i8);
   2854       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2855                VT == MVT::v8f16)
   2856         return SelectStoreLane(Node, 3, AArch64::ST3i16);
   2857       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2858                VT == MVT::v2f32)
   2859         return SelectStoreLane(Node, 3, AArch64::ST3i32);
   2860       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2861                VT == MVT::v1f64)
   2862         return SelectStoreLane(Node, 3, AArch64::ST3i64);
   2863       break;
   2864     }
   2865     case Intrinsic::aarch64_neon_st4lane: {
   2866       if (VT == MVT::v16i8 || VT == MVT::v8i8)
   2867         return SelectStoreLane(Node, 4, AArch64::ST4i8);
   2868       else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   2869                VT == MVT::v8f16)
   2870         return SelectStoreLane(Node, 4, AArch64::ST4i16);
   2871       else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   2872                VT == MVT::v2f32)
   2873         return SelectStoreLane(Node, 4, AArch64::ST4i32);
   2874       else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   2875                VT == MVT::v1f64)
   2876         return SelectStoreLane(Node, 4, AArch64::ST4i64);
   2877       break;
   2878     }
   2879     }
   2880     break;
   2881   }
   2882   case AArch64ISD::LD2post: {
   2883     if (VT == MVT::v8i8)
   2884       return SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
   2885     else if (VT == MVT::v16i8)
   2886       return SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
   2887     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2888       return SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
   2889     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2890       return SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
   2891     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2892       return SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
   2893     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2894       return SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
   2895     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2896       return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   2897     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2898       return SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
   2899     break;
   2900   }
   2901   case AArch64ISD::LD3post: {
   2902     if (VT == MVT::v8i8)
   2903       return SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
   2904     else if (VT == MVT::v16i8)
   2905       return SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
   2906     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2907       return SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
   2908     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2909       return SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
   2910     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2911       return SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
   2912     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2913       return SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
   2914     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2915       return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   2916     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2917       return SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
   2918     break;
   2919   }
   2920   case AArch64ISD::LD4post: {
   2921     if (VT == MVT::v8i8)
   2922       return SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
   2923     else if (VT == MVT::v16i8)
   2924       return SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
   2925     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2926       return SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
   2927     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2928       return SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
   2929     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2930       return SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
   2931     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2932       return SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
   2933     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2934       return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   2935     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2936       return SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
   2937     break;
   2938   }
   2939   case AArch64ISD::LD1x2post: {
   2940     if (VT == MVT::v8i8)
   2941       return SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
   2942     else if (VT == MVT::v16i8)
   2943       return SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
   2944     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2945       return SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
   2946     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2947       return SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
   2948     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2949       return SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
   2950     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2951       return SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
   2952     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2953       return SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   2954     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2955       return SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
   2956     break;
   2957   }
   2958   case AArch64ISD::LD1x3post: {
   2959     if (VT == MVT::v8i8)
   2960       return SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
   2961     else if (VT == MVT::v16i8)
   2962       return SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
   2963     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2964       return SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
   2965     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2966       return SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
   2967     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2968       return SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
   2969     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2970       return SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
   2971     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2972       return SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   2973     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2974       return SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
   2975     break;
   2976   }
   2977   case AArch64ISD::LD1x4post: {
   2978     if (VT == MVT::v8i8)
   2979       return SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
   2980     else if (VT == MVT::v16i8)
   2981       return SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
   2982     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   2983       return SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
   2984     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   2985       return SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
   2986     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   2987       return SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
   2988     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   2989       return SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
   2990     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   2991       return SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   2992     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   2993       return SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
   2994     break;
   2995   }
   2996   case AArch64ISD::LD1DUPpost: {
   2997     if (VT == MVT::v8i8)
   2998       return SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
   2999     else if (VT == MVT::v16i8)
   3000       return SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
   3001     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3002       return SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
   3003     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3004       return SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
   3005     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3006       return SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
   3007     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3008       return SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
   3009     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3010       return SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
   3011     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3012       return SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
   3013     break;
   3014   }
   3015   case AArch64ISD::LD2DUPpost: {
   3016     if (VT == MVT::v8i8)
   3017       return SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
   3018     else if (VT == MVT::v16i8)
   3019       return SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
   3020     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3021       return SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
   3022     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3023       return SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
   3024     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3025       return SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
   3026     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3027       return SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
   3028     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3029       return SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
   3030     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3031       return SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
   3032     break;
   3033   }
   3034   case AArch64ISD::LD3DUPpost: {
   3035     if (VT == MVT::v8i8)
   3036       return SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
   3037     else if (VT == MVT::v16i8)
   3038       return SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
   3039     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3040       return SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
   3041     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3042       return SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
   3043     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3044       return SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
   3045     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3046       return SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
   3047     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3048       return SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
   3049     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3050       return SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
   3051     break;
   3052   }
   3053   case AArch64ISD::LD4DUPpost: {
   3054     if (VT == MVT::v8i8)
   3055       return SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
   3056     else if (VT == MVT::v16i8)
   3057       return SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
   3058     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3059       return SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
   3060     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3061       return SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
   3062     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3063       return SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
   3064     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3065       return SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
   3066     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3067       return SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
   3068     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3069       return SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
   3070     break;
   3071   }
   3072   case AArch64ISD::LD1LANEpost: {
   3073     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3074       return SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
   3075     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3076              VT == MVT::v8f16)
   3077       return SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
   3078     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3079              VT == MVT::v2f32)
   3080       return SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
   3081     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3082              VT == MVT::v1f64)
   3083       return SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
   3084     break;
   3085   }
   3086   case AArch64ISD::LD2LANEpost: {
   3087     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3088       return SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
   3089     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3090              VT == MVT::v8f16)
   3091       return SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
   3092     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3093              VT == MVT::v2f32)
   3094       return SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
   3095     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3096              VT == MVT::v1f64)
   3097       return SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
   3098     break;
   3099   }
   3100   case AArch64ISD::LD3LANEpost: {
   3101     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3102       return SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
   3103     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3104              VT == MVT::v8f16)
   3105       return SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
   3106     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3107              VT == MVT::v2f32)
   3108       return SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
   3109     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3110              VT == MVT::v1f64)
   3111       return SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
   3112     break;
   3113   }
   3114   case AArch64ISD::LD4LANEpost: {
   3115     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3116       return SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
   3117     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3118              VT == MVT::v8f16)
   3119       return SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
   3120     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3121              VT == MVT::v2f32)
   3122       return SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
   3123     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3124              VT == MVT::v1f64)
   3125       return SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
   3126     break;
   3127   }
   3128   case AArch64ISD::ST2post: {
   3129     VT = Node->getOperand(1).getValueType();
   3130     if (VT == MVT::v8i8)
   3131       return SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
   3132     else if (VT == MVT::v16i8)
   3133       return SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
   3134     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3135       return SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
   3136     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3137       return SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
   3138     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3139       return SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
   3140     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3141       return SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
   3142     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3143       return SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
   3144     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3145       return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   3146     break;
   3147   }
   3148   case AArch64ISD::ST3post: {
   3149     VT = Node->getOperand(1).getValueType();
   3150     if (VT == MVT::v8i8)
   3151       return SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
   3152     else if (VT == MVT::v16i8)
   3153       return SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
   3154     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3155       return SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
   3156     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3157       return SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
   3158     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3159       return SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
   3160     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3161       return SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
   3162     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3163       return SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
   3164     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3165       return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   3166     break;
   3167   }
   3168   case AArch64ISD::ST4post: {
   3169     VT = Node->getOperand(1).getValueType();
   3170     if (VT == MVT::v8i8)
   3171       return SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
   3172     else if (VT == MVT::v16i8)
   3173       return SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
   3174     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3175       return SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
   3176     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3177       return SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
   3178     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3179       return SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
   3180     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3181       return SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
   3182     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3183       return SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
   3184     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3185       return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   3186     break;
   3187   }
   3188   case AArch64ISD::ST1x2post: {
   3189     VT = Node->getOperand(1).getValueType();
   3190     if (VT == MVT::v8i8)
   3191       return SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
   3192     else if (VT == MVT::v16i8)
   3193       return SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
   3194     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3195       return SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
   3196     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3197       return SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
   3198     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3199       return SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
   3200     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3201       return SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
   3202     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3203       return SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   3204     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3205       return SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
   3206     break;
   3207   }
   3208   case AArch64ISD::ST1x3post: {
   3209     VT = Node->getOperand(1).getValueType();
   3210     if (VT == MVT::v8i8)
   3211       return SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
   3212     else if (VT == MVT::v16i8)
   3213       return SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
   3214     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3215       return SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
   3216     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3217       return SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
   3218     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3219       return SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
   3220     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3221       return SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
   3222     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3223       return SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   3224     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3225       return SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
   3226     break;
   3227   }
   3228   case AArch64ISD::ST1x4post: {
   3229     VT = Node->getOperand(1).getValueType();
   3230     if (VT == MVT::v8i8)
   3231       return SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
   3232     else if (VT == MVT::v16i8)
   3233       return SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
   3234     else if (VT == MVT::v4i16 || VT == MVT::v4f16)
   3235       return SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
   3236     else if (VT == MVT::v8i16 || VT == MVT::v8f16)
   3237       return SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
   3238     else if (VT == MVT::v2i32 || VT == MVT::v2f32)
   3239       return SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
   3240     else if (VT == MVT::v4i32 || VT == MVT::v4f32)
   3241       return SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
   3242     else if (VT == MVT::v1i64 || VT == MVT::v1f64)
   3243       return SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   3244     else if (VT == MVT::v2i64 || VT == MVT::v2f64)
   3245       return SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
   3246     break;
   3247   }
   3248   case AArch64ISD::ST2LANEpost: {
   3249     VT = Node->getOperand(1).getValueType();
   3250     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3251       return SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
   3252     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3253              VT == MVT::v8f16)
   3254       return SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
   3255     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3256              VT == MVT::v2f32)
   3257       return SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
   3258     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3259              VT == MVT::v1f64)
   3260       return SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
   3261     break;
   3262   }
   3263   case AArch64ISD::ST3LANEpost: {
   3264     VT = Node->getOperand(1).getValueType();
   3265     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3266       return SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
   3267     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3268              VT == MVT::v8f16)
   3269       return SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
   3270     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3271              VT == MVT::v2f32)
   3272       return SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
   3273     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3274              VT == MVT::v1f64)
   3275       return SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
   3276     break;
   3277   }
   3278   case AArch64ISD::ST4LANEpost: {
   3279     VT = Node->getOperand(1).getValueType();
   3280     if (VT == MVT::v16i8 || VT == MVT::v8i8)
   3281       return SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
   3282     else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3283              VT == MVT::v8f16)
   3284       return SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
   3285     else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3286              VT == MVT::v2f32)
   3287       return SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
   3288     else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3289              VT == MVT::v1f64)
   3290       return SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
   3291     break;
   3292   }
   3293   }
   3294 
   3295   // Select the default instruction
   3296   ResNode = SelectCode(Node);
   3297 
   3298   DEBUG(errs() << "=> ");
   3299   if (ResNode == nullptr || ResNode == Node)
   3300     DEBUG(Node->dump(CurDAG));
   3301   else
   3302     DEBUG(ResNode->dump(CurDAG));
   3303   DEBUG(errs() << "\n");
   3304 
   3305   return ResNode;
   3306 }
   3307 
   3308 /// createAArch64ISelDag - This pass converts a legalized DAG into a
   3309 /// AArch64-specific DAG, ready for instruction scheduling.
   3310 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
   3311                                          CodeGenOpt::Level OptLevel) {
   3312   return new AArch64DAGToDAGISel(TM, OptLevel);
   3313 }
   3314