Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the ARM target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ARM.h"
     15 #include "ARMBaseInstrInfo.h"
     16 #include "ARMTargetMachine.h"
     17 #include "MCTargetDesc/ARMAddressingModes.h"
     18 #include "llvm/ADT/StringSwitch.h"
     19 #include "llvm/CodeGen/MachineFrameInfo.h"
     20 #include "llvm/CodeGen/MachineFunction.h"
     21 #include "llvm/CodeGen/MachineInstrBuilder.h"
     22 #include "llvm/CodeGen/MachineRegisterInfo.h"
     23 #include "llvm/CodeGen/SelectionDAG.h"
     24 #include "llvm/CodeGen/SelectionDAGISel.h"
     25 #include "llvm/IR/CallingConv.h"
     26 #include "llvm/IR/Constants.h"
     27 #include "llvm/IR/DerivedTypes.h"
     28 #include "llvm/IR/Function.h"
     29 #include "llvm/IR/Intrinsics.h"
     30 #include "llvm/IR/LLVMContext.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Compiler.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/ErrorHandling.h"
     35 #include "llvm/Target/TargetLowering.h"
     36 #include "llvm/Target/TargetOptions.h"
     37 
     38 using namespace llvm;
     39 
     40 #define DEBUG_TYPE "arm-isel"
     41 
     42 static cl::opt<bool>
     43 DisableShifterOp("disable-shifter-op", cl::Hidden,
     44   cl::desc("Disable isel of shifter-op"),
     45   cl::init(false));
     46 
     47 static cl::opt<bool>
     48 CheckVMLxHazard("check-vmlx-hazard", cl::Hidden,
     49   cl::desc("Check fp vmla / vmls hazard at isel time"),
     50   cl::init(true));
     51 
     52 //===--------------------------------------------------------------------===//
     53 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
     54 /// instructions for SelectionDAG operations.
     55 ///
     56 namespace {
     57 
     58 enum AddrMode2Type {
     59   AM2_BASE, // Simple AM2 (+-imm12)
     60   AM2_SHOP  // Shifter-op AM2
     61 };
     62 
     63 class ARMDAGToDAGISel : public SelectionDAGISel {
     64   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     65   /// make the right decision when generating code for different targets.
     66   const ARMSubtarget *Subtarget;
     67 
     68 public:
     69   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
     70       : SelectionDAGISel(tm, OptLevel) {}
     71 
     72   bool runOnMachineFunction(MachineFunction &MF) override {
     73     // Reset the subtarget each time through.
     74     Subtarget = &MF.getSubtarget<ARMSubtarget>();
     75     SelectionDAGISel::runOnMachineFunction(MF);
     76     return true;
     77   }
     78 
     79   const char *getPassName() const override {
     80     return "ARM Instruction Selection";
     81   }
     82 
     83   void PreprocessISelDAG() override;
     84 
     85   /// getI32Imm - Return a target constant of type i32 with the specified
     86   /// value.
     87   inline SDValue getI32Imm(unsigned Imm, SDLoc dl) {
     88     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
     89   }
     90 
     91   SDNode *Select(SDNode *N) override;
     92 
     93 
     94   bool hasNoVMLxHazardUse(SDNode *N) const;
     95   bool isShifterOpProfitable(const SDValue &Shift,
     96                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
     97   bool SelectRegShifterOperand(SDValue N, SDValue &A,
     98                                SDValue &B, SDValue &C,
     99                                bool CheckProfitability = true);
    100   bool SelectImmShifterOperand(SDValue N, SDValue &A,
    101                                SDValue &B, bool CheckProfitability = true);
    102   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
    103                                     SDValue &B, SDValue &C) {
    104     // Don't apply the profitability check
    105     return SelectRegShifterOperand(N, A, B, C, false);
    106   }
    107   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
    108                                     SDValue &B) {
    109     // Don't apply the profitability check
    110     return SelectImmShifterOperand(N, A, B, false);
    111   }
    112 
    113   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
    114   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
    115 
    116   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
    117                                       SDValue &Offset, SDValue &Opc);
    118   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
    119                            SDValue &Opc) {
    120     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
    121   }
    122 
    123   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
    124                            SDValue &Opc) {
    125     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
    126   }
    127 
    128   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
    129                        SDValue &Opc) {
    130     SelectAddrMode2Worker(N, Base, Offset, Opc);
    131 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
    132     // This always matches one way or another.
    133     return true;
    134   }
    135 
    136   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
    137     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
    138     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
    139     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
    140     return true;
    141   }
    142 
    143   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
    144                              SDValue &Offset, SDValue &Opc);
    145   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
    146                              SDValue &Offset, SDValue &Opc);
    147   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
    148                              SDValue &Offset, SDValue &Opc);
    149   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
    150   bool SelectAddrMode3(SDValue N, SDValue &Base,
    151                        SDValue &Offset, SDValue &Opc);
    152   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
    153                              SDValue &Offset, SDValue &Opc);
    154   bool SelectAddrMode5(SDValue N, SDValue &Base,
    155                        SDValue &Offset);
    156   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
    157   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
    158 
    159   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
    160 
    161   // Thumb Addressing Modes:
    162   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
    163   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
    164                                 SDValue &OffImm);
    165   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
    166                                  SDValue &OffImm);
    167   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
    168                                  SDValue &OffImm);
    169   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
    170                                  SDValue &OffImm);
    171   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
    172 
    173   // Thumb 2 Addressing Modes:
    174   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
    175   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
    176                             SDValue &OffImm);
    177   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
    178                                  SDValue &OffImm);
    179   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
    180                              SDValue &OffReg, SDValue &ShImm);
    181   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
    182 
    183   inline bool is_so_imm(unsigned Imm) const {
    184     return ARM_AM::getSOImmVal(Imm) != -1;
    185   }
    186 
    187   inline bool is_so_imm_not(unsigned Imm) const {
    188     return ARM_AM::getSOImmVal(~Imm) != -1;
    189   }
    190 
    191   inline bool is_t2_so_imm(unsigned Imm) const {
    192     return ARM_AM::getT2SOImmVal(Imm) != -1;
    193   }
    194 
    195   inline bool is_t2_so_imm_not(unsigned Imm) const {
    196     return ARM_AM::getT2SOImmVal(~Imm) != -1;
    197   }
    198 
    199   // Include the pieces autogenerated from the target description.
    200 #include "ARMGenDAGISel.inc"
    201 
    202 private:
    203   /// SelectARMIndexedLoad - Indexed (pre/post inc/dec) load matching code for
    204   /// ARM.
    205   SDNode *SelectARMIndexedLoad(SDNode *N);
    206   SDNode *SelectT2IndexedLoad(SDNode *N);
    207 
    208   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
    209   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
    210   /// loads of D registers and even subregs and odd subregs of Q registers.
    211   /// For NumVecs <= 2, QOpcodes1 is not used.
    212   SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
    213                     const uint16_t *DOpcodes,
    214                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
    215 
    216   /// SelectVST - Select NEON store intrinsics.  NumVecs should
    217   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
    218   /// stores of D registers and even subregs and odd subregs of Q registers.
    219   /// For NumVecs <= 2, QOpcodes1 is not used.
    220   SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
    221                     const uint16_t *DOpcodes,
    222                     const uint16_t *QOpcodes0, const uint16_t *QOpcodes1);
    223 
    224   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
    225   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
    226   /// load/store of D registers and Q registers.
    227   SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad,
    228                           bool isUpdating, unsigned NumVecs,
    229                           const uint16_t *DOpcodes, const uint16_t *QOpcodes);
    230 
    231   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
    232   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
    233   /// for loading D registers.  (Q registers are not supported.)
    234   SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
    235                        const uint16_t *Opcodes);
    236 
    237   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
    238   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
    239   /// generated to force the table registers to be consecutive.
    240   SDNode *SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
    241 
    242   /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM.
    243   SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
    244 
    245   // Select special operations if node forms integer ABS pattern
    246   SDNode *SelectABSOp(SDNode *N);
    247 
    248   SDNode *SelectReadRegister(SDNode *N);
    249   SDNode *SelectWriteRegister(SDNode *N);
    250 
    251   SDNode *SelectInlineAsm(SDNode *N);
    252 
    253   SDNode *SelectConcatVector(SDNode *N);
    254 
    255   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
    256   /// inline asm expressions.
    257   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
    258                                     std::vector<SDValue> &OutOps) override;
    259 
    260   // Form pairs of consecutive R, S, D, or Q registers.
    261   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
    262   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
    263   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
    264   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
    265 
    266   // Form sequences of 4 consecutive S, D, or Q registers.
    267   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    268   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    269   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    270 
    271   // Get the alignment operand for a NEON VLD or VST instruction.
    272   SDValue GetVLDSTAlign(SDValue Align, SDLoc dl, unsigned NumVecs,
    273                         bool is64BitVector);
    274 
    275   /// Returns the number of instructions required to materialize the given
    276   /// constant in a register, or 3 if a literal pool load is needed.
    277   unsigned ConstantMaterializationCost(unsigned Val) const;
    278 
    279   /// Checks if N is a multiplication by a constant where we can extract out a
    280   /// power of two from the constant so that it can be used in a shift, but only
    281   /// if it simplifies the materialization of the constant. Returns true if it
    282   /// is, and assigns to PowerOfTwo the power of two that should be extracted
    283   /// out and to NewMulConst the new constant to be multiplied by.
    284   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
    285                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
    286 
    287   /// Replace N with M in CurDAG, in a way that also ensures that M gets
    288   /// selected when N would have been selected.
    289   void replaceDAGValue(const SDValue &N, SDValue M);
    290 };
    291 }
    292 
    293 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
    294 /// operand. If so Imm will receive the 32-bit value.
    295 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
    296   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
    297     Imm = cast<ConstantSDNode>(N)->getZExtValue();
    298     return true;
    299   }
    300   return false;
    301 }
    302 
    303 // isInt32Immediate - This method tests to see if a constant operand.
    304 // If so Imm will receive the 32 bit value.
    305 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
    306   return isInt32Immediate(N.getNode(), Imm);
    307 }
    308 
    309 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    310 // opcode and that it has a immediate integer right operand.
    311 // If so Imm will receive the 32 bit value.
    312 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
    313   return N->getOpcode() == Opc &&
    314          isInt32Immediate(N->getOperand(1).getNode(), Imm);
    315 }
    316 
    317 /// \brief Check whether a particular node is a constant value representable as
    318 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
    319 ///
    320 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
    321 static bool isScaledConstantInRange(SDValue Node, int Scale,
    322                                     int RangeMin, int RangeMax,
    323                                     int &ScaledConstant) {
    324   assert(Scale > 0 && "Invalid scale!");
    325 
    326   // Check that this is a constant.
    327   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
    328   if (!C)
    329     return false;
    330 
    331   ScaledConstant = (int) C->getZExtValue();
    332   if ((ScaledConstant % Scale) != 0)
    333     return false;
    334 
    335   ScaledConstant /= Scale;
    336   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
    337 }
    338 
    339 void ARMDAGToDAGISel::PreprocessISelDAG() {
    340   if (!Subtarget->hasV6T2Ops())
    341     return;
    342 
    343   bool isThumb2 = Subtarget->isThumb();
    344   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
    345        E = CurDAG->allnodes_end(); I != E; ) {
    346     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
    347 
    348     if (N->getOpcode() != ISD::ADD)
    349       continue;
    350 
    351     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
    352     // leading zeros, followed by consecutive set bits, followed by 1 or 2
    353     // trailing zeros, e.g. 1020.
    354     // Transform the expression to
    355     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
    356     // of trailing zeros of c2. The left shift would be folded as an shifter
    357     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
    358     // node (UBFX).
    359 
    360     SDValue N0 = N->getOperand(0);
    361     SDValue N1 = N->getOperand(1);
    362     unsigned And_imm = 0;
    363     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
    364       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
    365         std::swap(N0, N1);
    366     }
    367     if (!And_imm)
    368       continue;
    369 
    370     // Check if the AND mask is an immediate of the form: 000.....1111111100
    371     unsigned TZ = countTrailingZeros(And_imm);
    372     if (TZ != 1 && TZ != 2)
    373       // Be conservative here. Shifter operands aren't always free. e.g. On
    374       // Swift, left shifter operand of 1 / 2 for free but others are not.
    375       // e.g.
    376       //  ubfx   r3, r1, #16, #8
    377       //  ldr.w  r3, [r0, r3, lsl #2]
    378       // vs.
    379       //  mov.w  r9, #1020
    380       //  and.w  r2, r9, r1, lsr #14
    381       //  ldr    r2, [r0, r2]
    382       continue;
    383     And_imm >>= TZ;
    384     if (And_imm & (And_imm + 1))
    385       continue;
    386 
    387     // Look for (and (srl X, c1), c2).
    388     SDValue Srl = N1.getOperand(0);
    389     unsigned Srl_imm = 0;
    390     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
    391         (Srl_imm <= 2))
    392       continue;
    393 
    394     // Make sure first operand is not a shifter operand which would prevent
    395     // folding of the left shift.
    396     SDValue CPTmp0;
    397     SDValue CPTmp1;
    398     SDValue CPTmp2;
    399     if (isThumb2) {
    400       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
    401         continue;
    402     } else {
    403       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
    404           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
    405         continue;
    406     }
    407 
    408     // Now make the transformation.
    409     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
    410                           Srl.getOperand(0),
    411                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
    412                                               MVT::i32));
    413     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
    414                          Srl,
    415                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
    416     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
    417                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
    418     CurDAG->UpdateNodeOperands(N, N0, N1);
    419   }
    420 }
    421 
    422 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
    423 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
    424 /// least on current ARM implementations) which should be avoidded.
    425 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
    426   if (OptLevel == CodeGenOpt::None)
    427     return true;
    428 
    429   if (!CheckVMLxHazard)
    430     return true;
    431 
    432   if (!Subtarget->isCortexA7() && !Subtarget->isCortexA8() &&
    433       !Subtarget->isCortexA9() && !Subtarget->isSwift())
    434     return true;
    435 
    436   if (!N->hasOneUse())
    437     return false;
    438 
    439   SDNode *Use = *N->use_begin();
    440   if (Use->getOpcode() == ISD::CopyToReg)
    441     return true;
    442   if (Use->isMachineOpcode()) {
    443     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
    444         CurDAG->getSubtarget().getInstrInfo());
    445 
    446     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
    447     if (MCID.mayStore())
    448       return true;
    449     unsigned Opcode = MCID.getOpcode();
    450     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
    451       return true;
    452     // vmlx feeding into another vmlx. We actually want to unfold
    453     // the use later in the MLxExpansion pass. e.g.
    454     // vmla
    455     // vmla (stall 8 cycles)
    456     //
    457     // vmul (5 cycles)
    458     // vadd (5 cycles)
    459     // vmla
    460     // This adds up to about 18 - 19 cycles.
    461     //
    462     // vmla
    463     // vmul (stall 4 cycles)
    464     // vadd adds up to about 14 cycles.
    465     return TII->isFpMLxInstruction(Opcode);
    466   }
    467 
    468   return false;
    469 }
    470 
    471 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
    472                                             ARM_AM::ShiftOpc ShOpcVal,
    473                                             unsigned ShAmt) {
    474   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
    475     return true;
    476   if (Shift.hasOneUse())
    477     return true;
    478   // R << 2 is free.
    479   return ShOpcVal == ARM_AM::lsl &&
    480          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
    481 }
    482 
    483 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
    484   if (Subtarget->isThumb()) {
    485     if (Val <= 255) return 1;                               // MOV
    486     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
    487     if (~Val <= 255) return 2;                              // MOV + MVN
    488     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
    489   } else {
    490     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
    491     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
    492     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
    493     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
    494   }
    495   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
    496   return 3; // Literal pool load
    497 }
    498 
    499 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
    500                                              unsigned MaxShift,
    501                                              unsigned &PowerOfTwo,
    502                                              SDValue &NewMulConst) const {
    503   assert(N.getOpcode() == ISD::MUL);
    504   assert(MaxShift > 0);
    505 
    506   // If the multiply is used in more than one place then changing the constant
    507   // will make other uses incorrect, so don't.
    508   if (!N.hasOneUse()) return false;
    509   // Check if the multiply is by a constant
    510   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
    511   if (!MulConst) return false;
    512   // If the constant is used in more than one place then modifying it will mean
    513   // we need to materialize two constants instead of one, which is a bad idea.
    514   if (!MulConst->hasOneUse()) return false;
    515   unsigned MulConstVal = MulConst->getZExtValue();
    516   if (MulConstVal == 0) return false;
    517 
    518   // Find the largest power of 2 that MulConstVal is a multiple of
    519   PowerOfTwo = MaxShift;
    520   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
    521     --PowerOfTwo;
    522     if (PowerOfTwo == 0) return false;
    523   }
    524 
    525   // Only optimise if the new cost is better
    526   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
    527   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
    528   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
    529   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
    530   return NewCost < OldCost;
    531 }
    532 
    533 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
    534   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
    535   CurDAG->ReplaceAllUsesWith(N, M);
    536 }
    537 
    538 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
    539                                               SDValue &BaseReg,
    540                                               SDValue &Opc,
    541                                               bool CheckProfitability) {
    542   if (DisableShifterOp)
    543     return false;
    544 
    545   // If N is a multiply-by-constant and it's profitable to extract a shift and
    546   // use it in a shifted operand do so.
    547   if (N.getOpcode() == ISD::MUL) {
    548     unsigned PowerOfTwo = 0;
    549     SDValue NewMulConst;
    550     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
    551       BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32,
    552                                                N.getOperand(0), NewMulConst)
    553                                    .getNode()),
    554                         0);
    555       replaceDAGValue(N.getOperand(1), NewMulConst);
    556       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
    557                                                           PowerOfTwo),
    558                                       SDLoc(N), MVT::i32);
    559       return true;
    560     }
    561   }
    562 
    563   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    564 
    565   // Don't match base register only case. That is matched to a separate
    566   // lower complexity pattern with explicit register operand.
    567   if (ShOpcVal == ARM_AM::no_shift) return false;
    568 
    569   BaseReg = N.getOperand(0);
    570   unsigned ShImmVal = 0;
    571   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
    572   if (!RHS) return false;
    573   ShImmVal = RHS->getZExtValue() & 31;
    574   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
    575                                   SDLoc(N), MVT::i32);
    576   return true;
    577 }
    578 
    579 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
    580                                               SDValue &BaseReg,
    581                                               SDValue &ShReg,
    582                                               SDValue &Opc,
    583                                               bool CheckProfitability) {
    584   if (DisableShifterOp)
    585     return false;
    586 
    587   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    588 
    589   // Don't match base register only case. That is matched to a separate
    590   // lower complexity pattern with explicit register operand.
    591   if (ShOpcVal == ARM_AM::no_shift) return false;
    592 
    593   BaseReg = N.getOperand(0);
    594   unsigned ShImmVal = 0;
    595   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
    596   if (RHS) return false;
    597 
    598   ShReg = N.getOperand(1);
    599   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
    600     return false;
    601   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
    602                                   SDLoc(N), MVT::i32);
    603   return true;
    604 }
    605 
    606 
    607 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
    608                                           SDValue &Base,
    609                                           SDValue &OffImm) {
    610   // Match simple R + imm12 operands.
    611 
    612   // Base only.
    613   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    614       !CurDAG->isBaseWithConstantOffset(N)) {
    615     if (N.getOpcode() == ISD::FrameIndex) {
    616       // Match frame index.
    617       int FI = cast<FrameIndexSDNode>(N)->getIndex();
    618       Base = CurDAG->getTargetFrameIndex(
    619           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    620       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    621       return true;
    622     }
    623 
    624     if (N.getOpcode() == ARMISD::Wrapper &&
    625         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
    626       Base = N.getOperand(0);
    627     } else
    628       Base = N;
    629     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    630     return true;
    631   }
    632 
    633   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    634     int RHSC = (int)RHS->getSExtValue();
    635     if (N.getOpcode() == ISD::SUB)
    636       RHSC = -RHSC;
    637 
    638     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
    639       Base   = N.getOperand(0);
    640       if (Base.getOpcode() == ISD::FrameIndex) {
    641         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    642         Base = CurDAG->getTargetFrameIndex(
    643             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    644       }
    645       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
    646       return true;
    647     }
    648   }
    649 
    650   // Base only.
    651   Base = N;
    652   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    653   return true;
    654 }
    655 
    656 
    657 
    658 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
    659                                       SDValue &Opc) {
    660   if (N.getOpcode() == ISD::MUL &&
    661       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
    662     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    663       // X * [3,5,9] -> X + X * [2,4,8] etc.
    664       int RHSC = (int)RHS->getZExtValue();
    665       if (RHSC & 1) {
    666         RHSC = RHSC & ~1;
    667         ARM_AM::AddrOpc AddSub = ARM_AM::add;
    668         if (RHSC < 0) {
    669           AddSub = ARM_AM::sub;
    670           RHSC = - RHSC;
    671         }
    672         if (isPowerOf2_32(RHSC)) {
    673           unsigned ShAmt = Log2_32(RHSC);
    674           Base = Offset = N.getOperand(0);
    675           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
    676                                                             ARM_AM::lsl),
    677                                           SDLoc(N), MVT::i32);
    678           return true;
    679         }
    680       }
    681     }
    682   }
    683 
    684   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    685       // ISD::OR that is equivalent to an ISD::ADD.
    686       !CurDAG->isBaseWithConstantOffset(N))
    687     return false;
    688 
    689   // Leave simple R +/- imm12 operands for LDRi12
    690   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
    691     int RHSC;
    692     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
    693                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
    694       return false;
    695   }
    696 
    697   // Otherwise this is R +/- [possibly shifted] R.
    698   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
    699   ARM_AM::ShiftOpc ShOpcVal =
    700     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
    701   unsigned ShAmt = 0;
    702 
    703   Base   = N.getOperand(0);
    704   Offset = N.getOperand(1);
    705 
    706   if (ShOpcVal != ARM_AM::no_shift) {
    707     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    708     // it.
    709     if (ConstantSDNode *Sh =
    710            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
    711       ShAmt = Sh->getZExtValue();
    712       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
    713         Offset = N.getOperand(1).getOperand(0);
    714       else {
    715         ShAmt = 0;
    716         ShOpcVal = ARM_AM::no_shift;
    717       }
    718     } else {
    719       ShOpcVal = ARM_AM::no_shift;
    720     }
    721   }
    722 
    723   // Try matching (R shl C) + (R).
    724   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
    725       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
    726         N.getOperand(0).hasOneUse())) {
    727     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
    728     if (ShOpcVal != ARM_AM::no_shift) {
    729       // Check to see if the RHS of the shift is a constant, if not, we can't
    730       // fold it.
    731       if (ConstantSDNode *Sh =
    732           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
    733         ShAmt = Sh->getZExtValue();
    734         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
    735           Offset = N.getOperand(0).getOperand(0);
    736           Base = N.getOperand(1);
    737         } else {
    738           ShAmt = 0;
    739           ShOpcVal = ARM_AM::no_shift;
    740         }
    741       } else {
    742         ShOpcVal = ARM_AM::no_shift;
    743       }
    744     }
    745   }
    746 
    747   // If Offset is a multiply-by-constant and it's profitable to extract a shift
    748   // and use it in a shifted operand do so.
    749   if (Offset.getOpcode() == ISD::MUL) {
    750     unsigned PowerOfTwo = 0;
    751     SDValue NewMulConst;
    752     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
    753       replaceDAGValue(Offset.getOperand(1), NewMulConst);
    754       ShAmt = PowerOfTwo;
    755       ShOpcVal = ARM_AM::lsl;
    756     }
    757   }
    758 
    759   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    760                                   SDLoc(N), MVT::i32);
    761   return true;
    762 }
    763 
    764 
    765 //-----
    766 
    767 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
    768                                                      SDValue &Base,
    769                                                      SDValue &Offset,
    770                                                      SDValue &Opc) {
    771   if (N.getOpcode() == ISD::MUL &&
    772       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
    773     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    774       // X * [3,5,9] -> X + X * [2,4,8] etc.
    775       int RHSC = (int)RHS->getZExtValue();
    776       if (RHSC & 1) {
    777         RHSC = RHSC & ~1;
    778         ARM_AM::AddrOpc AddSub = ARM_AM::add;
    779         if (RHSC < 0) {
    780           AddSub = ARM_AM::sub;
    781           RHSC = - RHSC;
    782         }
    783         if (isPowerOf2_32(RHSC)) {
    784           unsigned ShAmt = Log2_32(RHSC);
    785           Base = Offset = N.getOperand(0);
    786           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
    787                                                             ARM_AM::lsl),
    788                                           SDLoc(N), MVT::i32);
    789           return AM2_SHOP;
    790         }
    791       }
    792     }
    793   }
    794 
    795   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    796       // ISD::OR that is equivalent to an ADD.
    797       !CurDAG->isBaseWithConstantOffset(N)) {
    798     Base = N;
    799     if (N.getOpcode() == ISD::FrameIndex) {
    800       int FI = cast<FrameIndexSDNode>(N)->getIndex();
    801       Base = CurDAG->getTargetFrameIndex(
    802           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    803     } else if (N.getOpcode() == ARMISD::Wrapper &&
    804                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
    805       Base = N.getOperand(0);
    806     }
    807     Offset = CurDAG->getRegister(0, MVT::i32);
    808     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
    809                                                       ARM_AM::no_shift),
    810                                     SDLoc(N), MVT::i32);
    811     return AM2_BASE;
    812   }
    813 
    814   // Match simple R +/- imm12 operands.
    815   if (N.getOpcode() != ISD::SUB) {
    816     int RHSC;
    817     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
    818                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
    819       Base = N.getOperand(0);
    820       if (Base.getOpcode() == ISD::FrameIndex) {
    821         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    822         Base = CurDAG->getTargetFrameIndex(
    823             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    824       }
    825       Offset = CurDAG->getRegister(0, MVT::i32);
    826 
    827       ARM_AM::AddrOpc AddSub = ARM_AM::add;
    828       if (RHSC < 0) {
    829         AddSub = ARM_AM::sub;
    830         RHSC = - RHSC;
    831       }
    832       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
    833                                                         ARM_AM::no_shift),
    834                                       SDLoc(N), MVT::i32);
    835       return AM2_BASE;
    836     }
    837   }
    838 
    839   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
    840     // Compute R +/- (R << N) and reuse it.
    841     Base = N;
    842     Offset = CurDAG->getRegister(0, MVT::i32);
    843     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
    844                                                       ARM_AM::no_shift),
    845                                     SDLoc(N), MVT::i32);
    846     return AM2_BASE;
    847   }
    848 
    849   // Otherwise this is R +/- [possibly shifted] R.
    850   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
    851   ARM_AM::ShiftOpc ShOpcVal =
    852     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
    853   unsigned ShAmt = 0;
    854 
    855   Base   = N.getOperand(0);
    856   Offset = N.getOperand(1);
    857 
    858   if (ShOpcVal != ARM_AM::no_shift) {
    859     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    860     // it.
    861     if (ConstantSDNode *Sh =
    862            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
    863       ShAmt = Sh->getZExtValue();
    864       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
    865         Offset = N.getOperand(1).getOperand(0);
    866       else {
    867         ShAmt = 0;
    868         ShOpcVal = ARM_AM::no_shift;
    869       }
    870     } else {
    871       ShOpcVal = ARM_AM::no_shift;
    872     }
    873   }
    874 
    875   // Try matching (R shl C) + (R).
    876   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
    877       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
    878         N.getOperand(0).hasOneUse())) {
    879     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
    880     if (ShOpcVal != ARM_AM::no_shift) {
    881       // Check to see if the RHS of the shift is a constant, if not, we can't
    882       // fold it.
    883       if (ConstantSDNode *Sh =
    884           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
    885         ShAmt = Sh->getZExtValue();
    886         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
    887           Offset = N.getOperand(0).getOperand(0);
    888           Base = N.getOperand(1);
    889         } else {
    890           ShAmt = 0;
    891           ShOpcVal = ARM_AM::no_shift;
    892         }
    893       } else {
    894         ShOpcVal = ARM_AM::no_shift;
    895       }
    896     }
    897   }
    898 
    899   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    900                                   SDLoc(N), MVT::i32);
    901   return AM2_SHOP;
    902 }
    903 
    904 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
    905                                             SDValue &Offset, SDValue &Opc) {
    906   unsigned Opcode = Op->getOpcode();
    907   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    908     ? cast<LoadSDNode>(Op)->getAddressingMode()
    909     : cast<StoreSDNode>(Op)->getAddressingMode();
    910   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    911     ? ARM_AM::add : ARM_AM::sub;
    912   int Val;
    913   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
    914     return false;
    915 
    916   Offset = N;
    917   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    918   unsigned ShAmt = 0;
    919   if (ShOpcVal != ARM_AM::no_shift) {
    920     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    921     // it.
    922     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    923       ShAmt = Sh->getZExtValue();
    924       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
    925         Offset = N.getOperand(0);
    926       else {
    927         ShAmt = 0;
    928         ShOpcVal = ARM_AM::no_shift;
    929       }
    930     } else {
    931       ShOpcVal = ARM_AM::no_shift;
    932     }
    933   }
    934 
    935   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    936                                   SDLoc(N), MVT::i32);
    937   return true;
    938 }
    939 
    940 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
    941                                             SDValue &Offset, SDValue &Opc) {
    942   unsigned Opcode = Op->getOpcode();
    943   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    944     ? cast<LoadSDNode>(Op)->getAddressingMode()
    945     : cast<StoreSDNode>(Op)->getAddressingMode();
    946   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    947     ? ARM_AM::add : ARM_AM::sub;
    948   int Val;
    949   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
    950     if (AddSub == ARM_AM::sub) Val *= -1;
    951     Offset = CurDAG->getRegister(0, MVT::i32);
    952     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
    953     return true;
    954   }
    955 
    956   return false;
    957 }
    958 
    959 
    960 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
    961                                             SDValue &Offset, SDValue &Opc) {
    962   unsigned Opcode = Op->getOpcode();
    963   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    964     ? cast<LoadSDNode>(Op)->getAddressingMode()
    965     : cast<StoreSDNode>(Op)->getAddressingMode();
    966   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    967     ? ARM_AM::add : ARM_AM::sub;
    968   int Val;
    969   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
    970     Offset = CurDAG->getRegister(0, MVT::i32);
    971     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
    972                                                       ARM_AM::no_shift),
    973                                     SDLoc(Op), MVT::i32);
    974     return true;
    975   }
    976 
    977   return false;
    978 }
    979 
    980 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
    981   Base = N;
    982   return true;
    983 }
    984 
    985 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
    986                                       SDValue &Base, SDValue &Offset,
    987                                       SDValue &Opc) {
    988   if (N.getOpcode() == ISD::SUB) {
    989     // X - C  is canonicalize to X + -C, no need to handle it here.
    990     Base = N.getOperand(0);
    991     Offset = N.getOperand(1);
    992     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
    993                                     MVT::i32);
    994     return true;
    995   }
    996 
    997   if (!CurDAG->isBaseWithConstantOffset(N)) {
    998     Base = N;
    999     if (N.getOpcode() == ISD::FrameIndex) {
   1000       int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1001       Base = CurDAG->getTargetFrameIndex(
   1002           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1003     }
   1004     Offset = CurDAG->getRegister(0, MVT::i32);
   1005     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
   1006                                     MVT::i32);
   1007     return true;
   1008   }
   1009 
   1010   // If the RHS is +/- imm8, fold into addr mode.
   1011   int RHSC;
   1012   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
   1013                               -256 + 1, 256, RHSC)) { // 8 bits.
   1014     Base = N.getOperand(0);
   1015     if (Base.getOpcode() == ISD::FrameIndex) {
   1016       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1017       Base = CurDAG->getTargetFrameIndex(
   1018           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1019     }
   1020     Offset = CurDAG->getRegister(0, MVT::i32);
   1021 
   1022     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1023     if (RHSC < 0) {
   1024       AddSub = ARM_AM::sub;
   1025       RHSC = -RHSC;
   1026     }
   1027     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
   1028                                     MVT::i32);
   1029     return true;
   1030   }
   1031 
   1032   Base = N.getOperand(0);
   1033   Offset = N.getOperand(1);
   1034   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
   1035                                   MVT::i32);
   1036   return true;
   1037 }
   1038 
   1039 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
   1040                                             SDValue &Offset, SDValue &Opc) {
   1041   unsigned Opcode = Op->getOpcode();
   1042   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
   1043     ? cast<LoadSDNode>(Op)->getAddressingMode()
   1044     : cast<StoreSDNode>(Op)->getAddressingMode();
   1045   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
   1046     ? ARM_AM::add : ARM_AM::sub;
   1047   int Val;
   1048   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
   1049     Offset = CurDAG->getRegister(0, MVT::i32);
   1050     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
   1051                                     MVT::i32);
   1052     return true;
   1053   }
   1054 
   1055   Offset = N;
   1056   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
   1057                                   MVT::i32);
   1058   return true;
   1059 }
   1060 
   1061 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
   1062                                       SDValue &Base, SDValue &Offset) {
   1063   if (!CurDAG->isBaseWithConstantOffset(N)) {
   1064     Base = N;
   1065     if (N.getOpcode() == ISD::FrameIndex) {
   1066       int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1067       Base = CurDAG->getTargetFrameIndex(
   1068           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1069     } else if (N.getOpcode() == ARMISD::Wrapper &&
   1070                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
   1071       Base = N.getOperand(0);
   1072     }
   1073     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
   1074                                        SDLoc(N), MVT::i32);
   1075     return true;
   1076   }
   1077 
   1078   // If the RHS is +/- imm8, fold into addr mode.
   1079   int RHSC;
   1080   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
   1081                               -256 + 1, 256, RHSC)) {
   1082     Base = N.getOperand(0);
   1083     if (Base.getOpcode() == ISD::FrameIndex) {
   1084       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1085       Base = CurDAG->getTargetFrameIndex(
   1086           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1087     }
   1088 
   1089     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1090     if (RHSC < 0) {
   1091       AddSub = ARM_AM::sub;
   1092       RHSC = -RHSC;
   1093     }
   1094     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
   1095                                        SDLoc(N), MVT::i32);
   1096     return true;
   1097   }
   1098 
   1099   Base = N;
   1100   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
   1101                                      SDLoc(N), MVT::i32);
   1102   return true;
   1103 }
   1104 
   1105 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
   1106                                       SDValue &Align) {
   1107   Addr = N;
   1108 
   1109   unsigned Alignment = 0;
   1110 
   1111   MemSDNode *MemN = cast<MemSDNode>(Parent);
   1112 
   1113   if (isa<LSBaseSDNode>(MemN) ||
   1114       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
   1115         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
   1116        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
   1117     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
   1118     // The maximum alignment is equal to the memory size being referenced.
   1119     unsigned MMOAlign = MemN->getAlignment();
   1120     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
   1121     if (MMOAlign >= MemSize && MemSize > 1)
   1122       Alignment = MemSize;
   1123   } else {
   1124     // All other uses of addrmode6 are for intrinsics.  For now just record
   1125     // the raw alignment value; it will be refined later based on the legal
   1126     // alignment operands for the intrinsic.
   1127     Alignment = MemN->getAlignment();
   1128   }
   1129 
   1130   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
   1131   return true;
   1132 }
   1133 
   1134 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
   1135                                             SDValue &Offset) {
   1136   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
   1137   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
   1138   if (AM != ISD::POST_INC)
   1139     return false;
   1140   Offset = N;
   1141   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
   1142     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
   1143       Offset = CurDAG->getRegister(0, MVT::i32);
   1144   }
   1145   return true;
   1146 }
   1147 
   1148 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
   1149                                        SDValue &Offset, SDValue &Label) {
   1150   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
   1151     Offset = N.getOperand(0);
   1152     SDValue N1 = N.getOperand(1);
   1153     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
   1154                                       SDLoc(N), MVT::i32);
   1155     return true;
   1156   }
   1157 
   1158   return false;
   1159 }
   1160 
   1161 
   1162 //===----------------------------------------------------------------------===//
   1163 //                         Thumb Addressing Modes
   1164 //===----------------------------------------------------------------------===//
   1165 
   1166 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
   1167                                             SDValue &Base, SDValue &Offset){
   1168   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
   1169     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
   1170     if (!NC || !NC->isNullValue())
   1171       return false;
   1172 
   1173     Base = Offset = N;
   1174     return true;
   1175   }
   1176 
   1177   Base = N.getOperand(0);
   1178   Offset = N.getOperand(1);
   1179   return true;
   1180 }
   1181 
   1182 bool
   1183 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
   1184                                           SDValue &Base, SDValue &OffImm) {
   1185   if (!CurDAG->isBaseWithConstantOffset(N)) {
   1186     if (N.getOpcode() == ISD::ADD) {
   1187       return false; // We want to select register offset instead
   1188     } else if (N.getOpcode() == ARMISD::Wrapper &&
   1189                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
   1190       Base = N.getOperand(0);
   1191     } else {
   1192       Base = N;
   1193     }
   1194 
   1195     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1196     return true;
   1197   }
   1198 
   1199   // If the RHS is + imm5 * scale, fold into addr mode.
   1200   int RHSC;
   1201   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
   1202     Base = N.getOperand(0);
   1203     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1204     return true;
   1205   }
   1206 
   1207   // Offset is too large, so use register offset instead.
   1208   return false;
   1209 }
   1210 
   1211 bool
   1212 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
   1213                                            SDValue &OffImm) {
   1214   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
   1215 }
   1216 
   1217 bool
   1218 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
   1219                                            SDValue &OffImm) {
   1220   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
   1221 }
   1222 
   1223 bool
   1224 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
   1225                                            SDValue &OffImm) {
   1226   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
   1227 }
   1228 
   1229 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
   1230                                             SDValue &Base, SDValue &OffImm) {
   1231   if (N.getOpcode() == ISD::FrameIndex) {
   1232     int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1233     // Only multiples of 4 are allowed for the offset, so the frame object
   1234     // alignment must be at least 4.
   1235     MachineFrameInfo *MFI = MF->getFrameInfo();
   1236     if (MFI->getObjectAlignment(FI) < 4)
   1237       MFI->setObjectAlignment(FI, 4);
   1238     Base = CurDAG->getTargetFrameIndex(
   1239         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1240     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1241     return true;
   1242   }
   1243 
   1244   if (!CurDAG->isBaseWithConstantOffset(N))
   1245     return false;
   1246 
   1247   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
   1248   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
   1249       (LHSR && LHSR->getReg() == ARM::SP)) {
   1250     // If the RHS is + imm8 * scale, fold into addr mode.
   1251     int RHSC;
   1252     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
   1253       Base = N.getOperand(0);
   1254       if (Base.getOpcode() == ISD::FrameIndex) {
   1255         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1256         // For LHS+RHS to result in an offset that's a multiple of 4 the object
   1257         // indexed by the LHS must be 4-byte aligned.
   1258         MachineFrameInfo *MFI = MF->getFrameInfo();
   1259         if (MFI->getObjectAlignment(FI) < 4)
   1260           MFI->setObjectAlignment(FI, 4);
   1261         Base = CurDAG->getTargetFrameIndex(
   1262             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1263       }
   1264       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1265       return true;
   1266     }
   1267   }
   1268 
   1269   return false;
   1270 }
   1271 
   1272 
   1273 //===----------------------------------------------------------------------===//
   1274 //                        Thumb 2 Addressing Modes
   1275 //===----------------------------------------------------------------------===//
   1276 
   1277 
   1278 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
   1279                                             SDValue &Base, SDValue &OffImm) {
   1280   // Match simple R + imm12 operands.
   1281 
   1282   // Base only.
   1283   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
   1284       !CurDAG->isBaseWithConstantOffset(N)) {
   1285     if (N.getOpcode() == ISD::FrameIndex) {
   1286       // Match frame index.
   1287       int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1288       Base = CurDAG->getTargetFrameIndex(
   1289           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1290       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1291       return true;
   1292     }
   1293 
   1294     if (N.getOpcode() == ARMISD::Wrapper &&
   1295         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
   1296       Base = N.getOperand(0);
   1297       if (Base.getOpcode() == ISD::TargetConstantPool)
   1298         return false;  // We want to select t2LDRpci instead.
   1299     } else
   1300       Base = N;
   1301     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1302     return true;
   1303   }
   1304 
   1305   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1306     if (SelectT2AddrModeImm8(N, Base, OffImm))
   1307       // Let t2LDRi8 handle (R - imm8).
   1308       return false;
   1309 
   1310     int RHSC = (int)RHS->getZExtValue();
   1311     if (N.getOpcode() == ISD::SUB)
   1312       RHSC = -RHSC;
   1313 
   1314     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
   1315       Base   = N.getOperand(0);
   1316       if (Base.getOpcode() == ISD::FrameIndex) {
   1317         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1318         Base = CurDAG->getTargetFrameIndex(
   1319             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1320       }
   1321       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1322       return true;
   1323     }
   1324   }
   1325 
   1326   // Base only.
   1327   Base = N;
   1328   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1329   return true;
   1330 }
   1331 
   1332 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
   1333                                            SDValue &Base, SDValue &OffImm) {
   1334   // Match simple R - imm8 operands.
   1335   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
   1336       !CurDAG->isBaseWithConstantOffset(N))
   1337     return false;
   1338 
   1339   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1340     int RHSC = (int)RHS->getSExtValue();
   1341     if (N.getOpcode() == ISD::SUB)
   1342       RHSC = -RHSC;
   1343 
   1344     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
   1345       Base = N.getOperand(0);
   1346       if (Base.getOpcode() == ISD::FrameIndex) {
   1347         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1348         Base = CurDAG->getTargetFrameIndex(
   1349             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1350       }
   1351       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1352       return true;
   1353     }
   1354   }
   1355 
   1356   return false;
   1357 }
   1358 
   1359 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
   1360                                                  SDValue &OffImm){
   1361   unsigned Opcode = Op->getOpcode();
   1362   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
   1363     ? cast<LoadSDNode>(Op)->getAddressingMode()
   1364     : cast<StoreSDNode>(Op)->getAddressingMode();
   1365   int RHSC;
   1366   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
   1367     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
   1368       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
   1369       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
   1370     return true;
   1371   }
   1372 
   1373   return false;
   1374 }
   1375 
   1376 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
   1377                                             SDValue &Base,
   1378                                             SDValue &OffReg, SDValue &ShImm) {
   1379   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
   1380   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
   1381     return false;
   1382 
   1383   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
   1384   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1385     int RHSC = (int)RHS->getZExtValue();
   1386     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
   1387       return false;
   1388     else if (RHSC < 0 && RHSC >= -255) // 8 bits
   1389       return false;
   1390   }
   1391 
   1392   // Look for (R + R) or (R + (R << [1,2,3])).
   1393   unsigned ShAmt = 0;
   1394   Base   = N.getOperand(0);
   1395   OffReg = N.getOperand(1);
   1396 
   1397   // Swap if it is ((R << c) + R).
   1398   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
   1399   if (ShOpcVal != ARM_AM::lsl) {
   1400     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
   1401     if (ShOpcVal == ARM_AM::lsl)
   1402       std::swap(Base, OffReg);
   1403   }
   1404 
   1405   if (ShOpcVal == ARM_AM::lsl) {
   1406     // Check to see if the RHS of the shift is a constant, if not, we can't fold
   1407     // it.
   1408     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
   1409       ShAmt = Sh->getZExtValue();
   1410       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
   1411         OffReg = OffReg.getOperand(0);
   1412       else {
   1413         ShAmt = 0;
   1414       }
   1415     }
   1416   }
   1417 
   1418   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
   1419   // and use it in a shifted operand do so.
   1420   if (OffReg.getOpcode() == ISD::MUL) {
   1421     unsigned PowerOfTwo = 0;
   1422     SDValue NewMulConst;
   1423     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
   1424       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
   1425       ShAmt = PowerOfTwo;
   1426     }
   1427   }
   1428 
   1429   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
   1430 
   1431   return true;
   1432 }
   1433 
   1434 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
   1435                                                 SDValue &OffImm) {
   1436   // This *must* succeed since it's used for the irreplaceable ldrex and strex
   1437   // instructions.
   1438   Base = N;
   1439   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1440 
   1441   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
   1442     return true;
   1443 
   1444   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
   1445   if (!RHS)
   1446     return true;
   1447 
   1448   uint32_t RHSC = (int)RHS->getZExtValue();
   1449   if (RHSC > 1020 || RHSC % 4 != 0)
   1450     return true;
   1451 
   1452   Base = N.getOperand(0);
   1453   if (Base.getOpcode() == ISD::FrameIndex) {
   1454     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1455     Base = CurDAG->getTargetFrameIndex(
   1456         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1457   }
   1458 
   1459   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
   1460   return true;
   1461 }
   1462 
   1463 //===--------------------------------------------------------------------===//
   1464 
   1465 /// getAL - Returns a ARMCC::AL immediate node.
   1466 static inline SDValue getAL(SelectionDAG *CurDAG, SDLoc dl) {
   1467   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
   1468 }
   1469 
   1470 SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) {
   1471   LoadSDNode *LD = cast<LoadSDNode>(N);
   1472   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1473   if (AM == ISD::UNINDEXED)
   1474     return nullptr;
   1475 
   1476   EVT LoadedVT = LD->getMemoryVT();
   1477   SDValue Offset, AMOpc;
   1478   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   1479   unsigned Opcode = 0;
   1480   bool Match = false;
   1481   if (LoadedVT == MVT::i32 && isPre &&
   1482       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
   1483     Opcode = ARM::LDR_PRE_IMM;
   1484     Match = true;
   1485   } else if (LoadedVT == MVT::i32 && !isPre &&
   1486       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
   1487     Opcode = ARM::LDR_POST_IMM;
   1488     Match = true;
   1489   } else if (LoadedVT == MVT::i32 &&
   1490       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
   1491     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
   1492     Match = true;
   1493 
   1494   } else if (LoadedVT == MVT::i16 &&
   1495              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
   1496     Match = true;
   1497     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
   1498       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
   1499       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
   1500   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
   1501     if (LD->getExtensionType() == ISD::SEXTLOAD) {
   1502       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
   1503         Match = true;
   1504         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
   1505       }
   1506     } else {
   1507       if (isPre &&
   1508           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
   1509         Match = true;
   1510         Opcode = ARM::LDRB_PRE_IMM;
   1511       } else if (!isPre &&
   1512                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
   1513         Match = true;
   1514         Opcode = ARM::LDRB_POST_IMM;
   1515       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
   1516         Match = true;
   1517         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
   1518       }
   1519     }
   1520   }
   1521 
   1522   if (Match) {
   1523     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
   1524       SDValue Chain = LD->getChain();
   1525       SDValue Base = LD->getBasePtr();
   1526       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
   1527                        CurDAG->getRegister(0, MVT::i32), Chain };
   1528       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
   1529                                     MVT::i32, MVT::Other, Ops);
   1530     } else {
   1531       SDValue Chain = LD->getChain();
   1532       SDValue Base = LD->getBasePtr();
   1533       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
   1534                        CurDAG->getRegister(0, MVT::i32), Chain };
   1535       return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
   1536                                     MVT::i32, MVT::Other, Ops);
   1537     }
   1538   }
   1539 
   1540   return nullptr;
   1541 }
   1542 
   1543 SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
   1544   LoadSDNode *LD = cast<LoadSDNode>(N);
   1545   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1546   if (AM == ISD::UNINDEXED)
   1547     return nullptr;
   1548 
   1549   EVT LoadedVT = LD->getMemoryVT();
   1550   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
   1551   SDValue Offset;
   1552   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   1553   unsigned Opcode = 0;
   1554   bool Match = false;
   1555   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
   1556     switch (LoadedVT.getSimpleVT().SimpleTy) {
   1557     case MVT::i32:
   1558       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
   1559       break;
   1560     case MVT::i16:
   1561       if (isSExtLd)
   1562         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
   1563       else
   1564         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
   1565       break;
   1566     case MVT::i8:
   1567     case MVT::i1:
   1568       if (isSExtLd)
   1569         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
   1570       else
   1571         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
   1572       break;
   1573     default:
   1574       return nullptr;
   1575     }
   1576     Match = true;
   1577   }
   1578 
   1579   if (Match) {
   1580     SDValue Chain = LD->getChain();
   1581     SDValue Base = LD->getBasePtr();
   1582     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
   1583                      CurDAG->getRegister(0, MVT::i32), Chain };
   1584     return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
   1585                                   MVT::Other, Ops);
   1586   }
   1587 
   1588   return nullptr;
   1589 }
   1590 
   1591 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
   1592 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   1593   SDLoc dl(V0.getNode());
   1594   SDValue RegClass =
   1595     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
   1596   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
   1597   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
   1598   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1599   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1600 }
   1601 
   1602 /// \brief Form a D register from a pair of S registers.
   1603 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1604   SDLoc dl(V0.getNode());
   1605   SDValue RegClass =
   1606     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
   1607   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
   1608   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
   1609   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1610   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1611 }
   1612 
   1613 /// \brief Form a quad register from a pair of D registers.
   1614 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1615   SDLoc dl(V0.getNode());
   1616   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
   1617                                                MVT::i32);
   1618   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
   1619   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
   1620   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1621   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1622 }
   1623 
   1624 /// \brief Form 4 consecutive D registers from a pair of Q registers.
   1625 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1626   SDLoc dl(V0.getNode());
   1627   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
   1628                                                MVT::i32);
   1629   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
   1630   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
   1631   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1632   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1633 }
   1634 
   1635 /// \brief Form 4 consecutive S registers.
   1636 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
   1637                                    SDValue V2, SDValue V3) {
   1638   SDLoc dl(V0.getNode());
   1639   SDValue RegClass =
   1640     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
   1641   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
   1642   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
   1643   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
   1644   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
   1645   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1646                                     V2, SubReg2, V3, SubReg3 };
   1647   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1648 }
   1649 
   1650 /// \brief Form 4 consecutive D registers.
   1651 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
   1652                                    SDValue V2, SDValue V3) {
   1653   SDLoc dl(V0.getNode());
   1654   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
   1655                                                MVT::i32);
   1656   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
   1657   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
   1658   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
   1659   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
   1660   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1661                                     V2, SubReg2, V3, SubReg3 };
   1662   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1663 }
   1664 
   1665 /// \brief Form 4 consecutive Q registers.
   1666 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
   1667                                    SDValue V2, SDValue V3) {
   1668   SDLoc dl(V0.getNode());
   1669   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
   1670                                                MVT::i32);
   1671   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
   1672   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
   1673   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
   1674   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
   1675   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1676                                     V2, SubReg2, V3, SubReg3 };
   1677   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1678 }
   1679 
   1680 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
   1681 /// of a NEON VLD or VST instruction.  The supported values depend on the
   1682 /// number of registers being loaded.
   1683 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, SDLoc dl,
   1684                                        unsigned NumVecs, bool is64BitVector) {
   1685   unsigned NumRegs = NumVecs;
   1686   if (!is64BitVector && NumVecs < 3)
   1687     NumRegs *= 2;
   1688 
   1689   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   1690   if (Alignment >= 32 && NumRegs == 4)
   1691     Alignment = 32;
   1692   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
   1693     Alignment = 16;
   1694   else if (Alignment >= 8)
   1695     Alignment = 8;
   1696   else
   1697     Alignment = 0;
   1698 
   1699   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   1700 }
   1701 
   1702 static bool isVLDfixed(unsigned Opc)
   1703 {
   1704   switch (Opc) {
   1705   default: return false;
   1706   case ARM::VLD1d8wb_fixed : return true;
   1707   case ARM::VLD1d16wb_fixed : return true;
   1708   case ARM::VLD1d64Qwb_fixed : return true;
   1709   case ARM::VLD1d32wb_fixed : return true;
   1710   case ARM::VLD1d64wb_fixed : return true;
   1711   case ARM::VLD1d64TPseudoWB_fixed : return true;
   1712   case ARM::VLD1d64QPseudoWB_fixed : return true;
   1713   case ARM::VLD1q8wb_fixed : return true;
   1714   case ARM::VLD1q16wb_fixed : return true;
   1715   case ARM::VLD1q32wb_fixed : return true;
   1716   case ARM::VLD1q64wb_fixed : return true;
   1717   case ARM::VLD2d8wb_fixed : return true;
   1718   case ARM::VLD2d16wb_fixed : return true;
   1719   case ARM::VLD2d32wb_fixed : return true;
   1720   case ARM::VLD2q8PseudoWB_fixed : return true;
   1721   case ARM::VLD2q16PseudoWB_fixed : return true;
   1722   case ARM::VLD2q32PseudoWB_fixed : return true;
   1723   case ARM::VLD2DUPd8wb_fixed : return true;
   1724   case ARM::VLD2DUPd16wb_fixed : return true;
   1725   case ARM::VLD2DUPd32wb_fixed : return true;
   1726   }
   1727 }
   1728 
   1729 static bool isVSTfixed(unsigned Opc)
   1730 {
   1731   switch (Opc) {
   1732   default: return false;
   1733   case ARM::VST1d8wb_fixed : return true;
   1734   case ARM::VST1d16wb_fixed : return true;
   1735   case ARM::VST1d32wb_fixed : return true;
   1736   case ARM::VST1d64wb_fixed : return true;
   1737   case ARM::VST1q8wb_fixed : return true;
   1738   case ARM::VST1q16wb_fixed : return true;
   1739   case ARM::VST1q32wb_fixed : return true;
   1740   case ARM::VST1q64wb_fixed : return true;
   1741   case ARM::VST1d64TPseudoWB_fixed : return true;
   1742   case ARM::VST1d64QPseudoWB_fixed : return true;
   1743   case ARM::VST2d8wb_fixed : return true;
   1744   case ARM::VST2d16wb_fixed : return true;
   1745   case ARM::VST2d32wb_fixed : return true;
   1746   case ARM::VST2q8PseudoWB_fixed : return true;
   1747   case ARM::VST2q16PseudoWB_fixed : return true;
   1748   case ARM::VST2q32PseudoWB_fixed : return true;
   1749   }
   1750 }
   1751 
   1752 // Get the register stride update opcode of a VLD/VST instruction that
   1753 // is otherwise equivalent to the given fixed stride updating instruction.
   1754 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   1755   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
   1756     && "Incorrect fixed stride updating instruction.");
   1757   switch (Opc) {
   1758   default: break;
   1759   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
   1760   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
   1761   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
   1762   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
   1763   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
   1764   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
   1765   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
   1766   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
   1767   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
   1768   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
   1769   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
   1770   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
   1771 
   1772   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
   1773   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
   1774   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
   1775   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
   1776   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
   1777   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
   1778   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
   1779   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
   1780   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
   1781   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
   1782 
   1783   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
   1784   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
   1785   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
   1786   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
   1787   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
   1788   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
   1789 
   1790   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
   1791   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
   1792   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
   1793   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
   1794   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
   1795   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
   1796 
   1797   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
   1798   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
   1799   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
   1800   }
   1801   return Opc; // If not one we handle, return it unchanged.
   1802 }
   1803 
   1804 SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
   1805                                    const uint16_t *DOpcodes,
   1806                                    const uint16_t *QOpcodes0,
   1807                                    const uint16_t *QOpcodes1) {
   1808   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
   1809   SDLoc dl(N);
   1810 
   1811   SDValue MemAddr, Align;
   1812   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   1813   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   1814     return nullptr;
   1815 
   1816   SDValue Chain = N->getOperand(0);
   1817   EVT VT = N->getValueType(0);
   1818   bool is64BitVector = VT.is64BitVector();
   1819   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
   1820 
   1821   unsigned OpcodeIndex;
   1822   switch (VT.getSimpleVT().SimpleTy) {
   1823   default: llvm_unreachable("unhandled vld type");
   1824     // Double-register operations:
   1825   case MVT::v8i8:  OpcodeIndex = 0; break;
   1826   case MVT::v4i16: OpcodeIndex = 1; break;
   1827   case MVT::v2f32:
   1828   case MVT::v2i32: OpcodeIndex = 2; break;
   1829   case MVT::v1i64: OpcodeIndex = 3; break;
   1830     // Quad-register operations:
   1831   case MVT::v16i8: OpcodeIndex = 0; break;
   1832   case MVT::v8i16: OpcodeIndex = 1; break;
   1833   case MVT::v4f32:
   1834   case MVT::v4i32: OpcodeIndex = 2; break;
   1835   case MVT::v2f64:
   1836   case MVT::v2i64: OpcodeIndex = 3;
   1837     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
   1838     break;
   1839   }
   1840 
   1841   EVT ResTy;
   1842   if (NumVecs == 1)
   1843     ResTy = VT;
   1844   else {
   1845     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   1846     if (!is64BitVector)
   1847       ResTyElts *= 2;
   1848     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
   1849   }
   1850   std::vector<EVT> ResTys;
   1851   ResTys.push_back(ResTy);
   1852   if (isUpdating)
   1853     ResTys.push_back(MVT::i32);
   1854   ResTys.push_back(MVT::Other);
   1855 
   1856   SDValue Pred = getAL(CurDAG, dl);
   1857   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   1858   SDNode *VLd;
   1859   SmallVector<SDValue, 7> Ops;
   1860 
   1861   // Double registers and VLD1/VLD2 quad registers are directly supported.
   1862   if (is64BitVector || NumVecs <= 2) {
   1863     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   1864                     QOpcodes0[OpcodeIndex]);
   1865     Ops.push_back(MemAddr);
   1866     Ops.push_back(Align);
   1867     if (isUpdating) {
   1868       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   1869       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
   1870       // case entirely when the rest are updated to that form, too.
   1871       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
   1872         Opc = getVLDSTRegisterUpdateOpcode(Opc);
   1873       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
   1874       // check for that explicitly too. Horribly hacky, but temporary.
   1875       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
   1876           !isa<ConstantSDNode>(Inc.getNode()))
   1877         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
   1878     }
   1879     Ops.push_back(Pred);
   1880     Ops.push_back(Reg0);
   1881     Ops.push_back(Chain);
   1882     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1883 
   1884   } else {
   1885     // Otherwise, quad registers are loaded with two separate instructions,
   1886     // where one loads the even registers and the other loads the odd registers.
   1887     EVT AddrTy = MemAddr.getValueType();
   1888 
   1889     // Load the even subregs.  This is always an updating load, so that it
   1890     // provides the address to the second load for the odd subregs.
   1891     SDValue ImplDef =
   1892       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
   1893     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
   1894     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
   1895                                           ResTy, AddrTy, MVT::Other, OpsA);
   1896     Chain = SDValue(VLdA, 2);
   1897 
   1898     // Load the odd subregs.
   1899     Ops.push_back(SDValue(VLdA, 1));
   1900     Ops.push_back(Align);
   1901     if (isUpdating) {
   1902       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   1903       assert(isa<ConstantSDNode>(Inc.getNode()) &&
   1904              "only constant post-increment update allowed for VLD3/4");
   1905       (void)Inc;
   1906       Ops.push_back(Reg0);
   1907     }
   1908     Ops.push_back(SDValue(VLdA, 0));
   1909     Ops.push_back(Pred);
   1910     Ops.push_back(Reg0);
   1911     Ops.push_back(Chain);
   1912     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
   1913   }
   1914 
   1915   // Transfer memoperands.
   1916   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1917   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1918   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
   1919 
   1920   if (NumVecs == 1)
   1921     return VLd;
   1922 
   1923   // Extract out the subregisters.
   1924   SDValue SuperReg = SDValue(VLd, 0);
   1925   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
   1926          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
   1927   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
   1928   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   1929     ReplaceUses(SDValue(N, Vec),
   1930                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
   1931   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
   1932   if (isUpdating)
   1933     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
   1934   return nullptr;
   1935 }
   1936 
   1937 SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   1938                                    const uint16_t *DOpcodes,
   1939                                    const uint16_t *QOpcodes0,
   1940                                    const uint16_t *QOpcodes1) {
   1941   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
   1942   SDLoc dl(N);
   1943 
   1944   SDValue MemAddr, Align;
   1945   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   1946   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
   1947   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   1948     return nullptr;
   1949 
   1950   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1951   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1952 
   1953   SDValue Chain = N->getOperand(0);
   1954   EVT VT = N->getOperand(Vec0Idx).getValueType();
   1955   bool is64BitVector = VT.is64BitVector();
   1956   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
   1957 
   1958   unsigned OpcodeIndex;
   1959   switch (VT.getSimpleVT().SimpleTy) {
   1960   default: llvm_unreachable("unhandled vst type");
   1961     // Double-register operations:
   1962   case MVT::v8i8:  OpcodeIndex = 0; break;
   1963   case MVT::v4i16: OpcodeIndex = 1; break;
   1964   case MVT::v2f32:
   1965   case MVT::v2i32: OpcodeIndex = 2; break;
   1966   case MVT::v1i64: OpcodeIndex = 3; break;
   1967     // Quad-register operations:
   1968   case MVT::v16i8: OpcodeIndex = 0; break;
   1969   case MVT::v8i16: OpcodeIndex = 1; break;
   1970   case MVT::v4f32:
   1971   case MVT::v4i32: OpcodeIndex = 2; break;
   1972   case MVT::v2f64:
   1973   case MVT::v2i64: OpcodeIndex = 3;
   1974     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
   1975     break;
   1976   }
   1977 
   1978   std::vector<EVT> ResTys;
   1979   if (isUpdating)
   1980     ResTys.push_back(MVT::i32);
   1981   ResTys.push_back(MVT::Other);
   1982 
   1983   SDValue Pred = getAL(CurDAG, dl);
   1984   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   1985   SmallVector<SDValue, 7> Ops;
   1986 
   1987   // Double registers and VST1/VST2 quad registers are directly supported.
   1988   if (is64BitVector || NumVecs <= 2) {
   1989     SDValue SrcReg;
   1990     if (NumVecs == 1) {
   1991       SrcReg = N->getOperand(Vec0Idx);
   1992     } else if (is64BitVector) {
   1993       // Form a REG_SEQUENCE to force register allocation.
   1994       SDValue V0 = N->getOperand(Vec0Idx + 0);
   1995       SDValue V1 = N->getOperand(Vec0Idx + 1);
   1996       if (NumVecs == 2)
   1997         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
   1998       else {
   1999         SDValue V2 = N->getOperand(Vec0Idx + 2);
   2000         // If it's a vst3, form a quad D-register and leave the last part as
   2001         // an undef.
   2002         SDValue V3 = (NumVecs == 3)
   2003           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
   2004           : N->getOperand(Vec0Idx + 3);
   2005         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2006       }
   2007     } else {
   2008       // Form a QQ register.
   2009       SDValue Q0 = N->getOperand(Vec0Idx);
   2010       SDValue Q1 = N->getOperand(Vec0Idx + 1);
   2011       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
   2012     }
   2013 
   2014     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   2015                     QOpcodes0[OpcodeIndex]);
   2016     Ops.push_back(MemAddr);
   2017     Ops.push_back(Align);
   2018     if (isUpdating) {
   2019       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2020       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
   2021       // case entirely when the rest are updated to that form, too.
   2022       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
   2023         Opc = getVLDSTRegisterUpdateOpcode(Opc);
   2024       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
   2025       // check for that explicitly too. Horribly hacky, but temporary.
   2026       if  (!isa<ConstantSDNode>(Inc.getNode()))
   2027         Ops.push_back(Inc);
   2028       else if (NumVecs > 2 && !isVSTfixed(Opc))
   2029         Ops.push_back(Reg0);
   2030     }
   2031     Ops.push_back(SrcReg);
   2032     Ops.push_back(Pred);
   2033     Ops.push_back(Reg0);
   2034     Ops.push_back(Chain);
   2035     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2036 
   2037     // Transfer memoperands.
   2038     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
   2039 
   2040     return VSt;
   2041   }
   2042 
   2043   // Otherwise, quad registers are stored with two separate instructions,
   2044   // where one stores the even registers and the other stores the odd registers.
   2045 
   2046   // Form the QQQQ REG_SEQUENCE.
   2047   SDValue V0 = N->getOperand(Vec0Idx + 0);
   2048   SDValue V1 = N->getOperand(Vec0Idx + 1);
   2049   SDValue V2 = N->getOperand(Vec0Idx + 2);
   2050   SDValue V3 = (NumVecs == 3)
   2051     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2052     : N->getOperand(Vec0Idx + 3);
   2053   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   2054 
   2055   // Store the even D registers.  This is always an updating store, so that it
   2056   // provides the address to the second store for the odd subregs.
   2057   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
   2058   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
   2059                                         MemAddr.getValueType(),
   2060                                         MVT::Other, OpsA);
   2061   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   2062   Chain = SDValue(VStA, 1);
   2063 
   2064   // Store the odd D registers.
   2065   Ops.push_back(SDValue(VStA, 0));
   2066   Ops.push_back(Align);
   2067   if (isUpdating) {
   2068     SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2069     assert(isa<ConstantSDNode>(Inc.getNode()) &&
   2070            "only constant post-increment update allowed for VST3/4");
   2071     (void)Inc;
   2072     Ops.push_back(Reg0);
   2073   }
   2074   Ops.push_back(RegSeq);
   2075   Ops.push_back(Pred);
   2076   Ops.push_back(Reg0);
   2077   Ops.push_back(Chain);
   2078   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
   2079                                         Ops);
   2080   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
   2081   return VStB;
   2082 }
   2083 
   2084 SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad,
   2085                                          bool isUpdating, unsigned NumVecs,
   2086                                          const uint16_t *DOpcodes,
   2087                                          const uint16_t *QOpcodes) {
   2088   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
   2089   SDLoc dl(N);
   2090 
   2091   SDValue MemAddr, Align;
   2092   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   2093   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
   2094   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   2095     return nullptr;
   2096 
   2097   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2098   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   2099 
   2100   SDValue Chain = N->getOperand(0);
   2101   unsigned Lane =
   2102     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
   2103   EVT VT = N->getOperand(Vec0Idx).getValueType();
   2104   bool is64BitVector = VT.is64BitVector();
   2105 
   2106   unsigned Alignment = 0;
   2107   if (NumVecs != 3) {
   2108     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   2109     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
   2110     if (Alignment > NumBytes)
   2111       Alignment = NumBytes;
   2112     if (Alignment < 8 && Alignment < NumBytes)
   2113       Alignment = 0;
   2114     // Alignment must be a power of two; make sure of that.
   2115     Alignment = (Alignment & -Alignment);
   2116     if (Alignment == 1)
   2117       Alignment = 0;
   2118   }
   2119   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   2120 
   2121   unsigned OpcodeIndex;
   2122   switch (VT.getSimpleVT().SimpleTy) {
   2123   default: llvm_unreachable("unhandled vld/vst lane type");
   2124     // Double-register operations:
   2125   case MVT::v8i8:  OpcodeIndex = 0; break;
   2126   case MVT::v4i16: OpcodeIndex = 1; break;
   2127   case MVT::v2f32:
   2128   case MVT::v2i32: OpcodeIndex = 2; break;
   2129     // Quad-register operations:
   2130   case MVT::v8i16: OpcodeIndex = 0; break;
   2131   case MVT::v4f32:
   2132   case MVT::v4i32: OpcodeIndex = 1; break;
   2133   }
   2134 
   2135   std::vector<EVT> ResTys;
   2136   if (IsLoad) {
   2137     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   2138     if (!is64BitVector)
   2139       ResTyElts *= 2;
   2140     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
   2141                                       MVT::i64, ResTyElts));
   2142   }
   2143   if (isUpdating)
   2144     ResTys.push_back(MVT::i32);
   2145   ResTys.push_back(MVT::Other);
   2146 
   2147   SDValue Pred = getAL(CurDAG, dl);
   2148   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2149 
   2150   SmallVector<SDValue, 8> Ops;
   2151   Ops.push_back(MemAddr);
   2152   Ops.push_back(Align);
   2153   if (isUpdating) {
   2154     SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2155     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
   2156   }
   2157 
   2158   SDValue SuperReg;
   2159   SDValue V0 = N->getOperand(Vec0Idx + 0);
   2160   SDValue V1 = N->getOperand(Vec0Idx + 1);
   2161   if (NumVecs == 2) {
   2162     if (is64BitVector)
   2163       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
   2164     else
   2165       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
   2166   } else {
   2167     SDValue V2 = N->getOperand(Vec0Idx + 2);
   2168     SDValue V3 = (NumVecs == 3)
   2169       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2170       : N->getOperand(Vec0Idx + 3);
   2171     if (is64BitVector)
   2172       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2173     else
   2174       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   2175   }
   2176   Ops.push_back(SuperReg);
   2177   Ops.push_back(getI32Imm(Lane, dl));
   2178   Ops.push_back(Pred);
   2179   Ops.push_back(Reg0);
   2180   Ops.push_back(Chain);
   2181 
   2182   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   2183                                   QOpcodes[OpcodeIndex]);
   2184   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2185   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   2186   if (!IsLoad)
   2187     return VLdLn;
   2188 
   2189   // Extract the subregisters.
   2190   SuperReg = SDValue(VLdLn, 0);
   2191   assert(ARM::dsub_7 == ARM::dsub_0+7 &&
   2192          ARM::qsub_3 == ARM::qsub_0+3 && "Unexpected subreg numbering");
   2193   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
   2194   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   2195     ReplaceUses(SDValue(N, Vec),
   2196                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
   2197   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
   2198   if (isUpdating)
   2199     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
   2200   return nullptr;
   2201 }
   2202 
   2203 SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating,
   2204                                       unsigned NumVecs,
   2205                                       const uint16_t *Opcodes) {
   2206   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
   2207   SDLoc dl(N);
   2208 
   2209   SDValue MemAddr, Align;
   2210   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
   2211     return nullptr;
   2212 
   2213   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2214   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   2215 
   2216   SDValue Chain = N->getOperand(0);
   2217   EVT VT = N->getValueType(0);
   2218 
   2219   unsigned Alignment = 0;
   2220   if (NumVecs != 3) {
   2221     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   2222     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
   2223     if (Alignment > NumBytes)
   2224       Alignment = NumBytes;
   2225     if (Alignment < 8 && Alignment < NumBytes)
   2226       Alignment = 0;
   2227     // Alignment must be a power of two; make sure of that.
   2228     Alignment = (Alignment & -Alignment);
   2229     if (Alignment == 1)
   2230       Alignment = 0;
   2231   }
   2232   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   2233 
   2234   unsigned OpcodeIndex;
   2235   switch (VT.getSimpleVT().SimpleTy) {
   2236   default: llvm_unreachable("unhandled vld-dup type");
   2237   case MVT::v8i8:  OpcodeIndex = 0; break;
   2238   case MVT::v4i16: OpcodeIndex = 1; break;
   2239   case MVT::v2f32:
   2240   case MVT::v2i32: OpcodeIndex = 2; break;
   2241   }
   2242 
   2243   SDValue Pred = getAL(CurDAG, dl);
   2244   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2245   SDValue SuperReg;
   2246   unsigned Opc = Opcodes[OpcodeIndex];
   2247   SmallVector<SDValue, 6> Ops;
   2248   Ops.push_back(MemAddr);
   2249   Ops.push_back(Align);
   2250   if (isUpdating) {
   2251     // fixed-stride update instructions don't have an explicit writeback
   2252     // operand. It's implicit in the opcode itself.
   2253     SDValue Inc = N->getOperand(2);
   2254     if (!isa<ConstantSDNode>(Inc.getNode()))
   2255       Ops.push_back(Inc);
   2256     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
   2257     else if (NumVecs > 2)
   2258       Ops.push_back(Reg0);
   2259   }
   2260   Ops.push_back(Pred);
   2261   Ops.push_back(Reg0);
   2262   Ops.push_back(Chain);
   2263 
   2264   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   2265   std::vector<EVT> ResTys;
   2266   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
   2267   if (isUpdating)
   2268     ResTys.push_back(MVT::i32);
   2269   ResTys.push_back(MVT::Other);
   2270   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2271   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   2272   SuperReg = SDValue(VLdDup, 0);
   2273 
   2274   // Extract the subregisters.
   2275   assert(ARM::dsub_7 == ARM::dsub_0+7 && "Unexpected subreg numbering");
   2276   unsigned SubIdx = ARM::dsub_0;
   2277   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   2278     ReplaceUses(SDValue(N, Vec),
   2279                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
   2280   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
   2281   if (isUpdating)
   2282     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
   2283   return nullptr;
   2284 }
   2285 
   2286 SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   2287                                     unsigned Opc) {
   2288   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
   2289   SDLoc dl(N);
   2290   EVT VT = N->getValueType(0);
   2291   unsigned FirstTblReg = IsExt ? 2 : 1;
   2292 
   2293   // Form a REG_SEQUENCE to force register allocation.
   2294   SDValue RegSeq;
   2295   SDValue V0 = N->getOperand(FirstTblReg + 0);
   2296   SDValue V1 = N->getOperand(FirstTblReg + 1);
   2297   if (NumVecs == 2)
   2298     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
   2299   else {
   2300     SDValue V2 = N->getOperand(FirstTblReg + 2);
   2301     // If it's a vtbl3, form a quad D-register and leave the last part as
   2302     // an undef.
   2303     SDValue V3 = (NumVecs == 3)
   2304       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2305       : N->getOperand(FirstTblReg + 3);
   2306     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2307   }
   2308 
   2309   SmallVector<SDValue, 6> Ops;
   2310   if (IsExt)
   2311     Ops.push_back(N->getOperand(1));
   2312   Ops.push_back(RegSeq);
   2313   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   2314   Ops.push_back(getAL(CurDAG, dl)); // predicate
   2315   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
   2316   return CurDAG->getMachineNode(Opc, dl, VT, Ops);
   2317 }
   2318 
   2319 SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N,
   2320                                                      bool isSigned) {
   2321   if (!Subtarget->hasV6T2Ops())
   2322     return nullptr;
   2323 
   2324   unsigned Opc = isSigned
   2325     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
   2326     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
   2327   SDLoc dl(N);
   2328 
   2329   // For unsigned extracts, check for a shift right and mask
   2330   unsigned And_imm = 0;
   2331   if (N->getOpcode() == ISD::AND) {
   2332     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
   2333 
   2334       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
   2335       if (And_imm & (And_imm + 1))
   2336         return nullptr;
   2337 
   2338       unsigned Srl_imm = 0;
   2339       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
   2340                                 Srl_imm)) {
   2341         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
   2342 
   2343         // Note: The width operand is encoded as width-1.
   2344         unsigned Width = countTrailingOnes(And_imm) - 1;
   2345         unsigned LSB = Srl_imm;
   2346 
   2347         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2348 
   2349         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
   2350           // It's cheaper to use a right shift to extract the top bits.
   2351           if (Subtarget->isThumb()) {
   2352             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
   2353             SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2354                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2355                               getAL(CurDAG, dl), Reg0, Reg0 };
   2356             return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2357           }
   2358 
   2359           // ARM models shift instructions as MOVsi with shifter operand.
   2360           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
   2361           SDValue ShOpc =
   2362             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
   2363                                       MVT::i32);
   2364           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
   2365                             getAL(CurDAG, dl), Reg0, Reg0 };
   2366           return CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
   2367         }
   2368 
   2369         SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2370                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2371                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
   2372                           getAL(CurDAG, dl), Reg0 };
   2373         return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2374       }
   2375     }
   2376     return nullptr;
   2377   }
   2378 
   2379   // Otherwise, we're looking for a shift of a shift
   2380   unsigned Shl_imm = 0;
   2381   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
   2382     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
   2383     unsigned Srl_imm = 0;
   2384     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
   2385       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
   2386       // Note: The width operand is encoded as width-1.
   2387       unsigned Width = 32 - Srl_imm - 1;
   2388       int LSB = Srl_imm - Shl_imm;
   2389       if (LSB < 0)
   2390         return nullptr;
   2391       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2392       SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2393                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2394                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
   2395                         getAL(CurDAG, dl), Reg0 };
   2396       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2397     }
   2398   }
   2399 
   2400   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
   2401     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
   2402     unsigned LSB = 0;
   2403     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
   2404         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
   2405       return nullptr;
   2406 
   2407     if (LSB + Width > 32)
   2408       return nullptr;
   2409 
   2410     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2411     SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2412                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2413                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
   2414                       getAL(CurDAG, dl), Reg0 };
   2415     return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2416   }
   2417 
   2418   return nullptr;
   2419 }
   2420 
   2421 /// Target-specific DAG combining for ISD::XOR.
   2422 /// Target-independent combining lowers SELECT_CC nodes of the form
   2423 /// select_cc setg[ge] X,  0,  X, -X
   2424 /// select_cc setgt    X, -1,  X, -X
   2425 /// select_cc setl[te] X,  0, -X,  X
   2426 /// select_cc setlt    X,  1, -X,  X
   2427 /// which represent Integer ABS into:
   2428 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   2429 /// ARM instruction selection detects the latter and matches it to
   2430 /// ARM::ABS or ARM::t2ABS machine node.
   2431 SDNode *ARMDAGToDAGISel::SelectABSOp(SDNode *N){
   2432   SDValue XORSrc0 = N->getOperand(0);
   2433   SDValue XORSrc1 = N->getOperand(1);
   2434   EVT VT = N->getValueType(0);
   2435 
   2436   if (Subtarget->isThumb1Only())
   2437     return nullptr;
   2438 
   2439   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
   2440     return nullptr;
   2441 
   2442   SDValue ADDSrc0 = XORSrc0.getOperand(0);
   2443   SDValue ADDSrc1 = XORSrc0.getOperand(1);
   2444   SDValue SRASrc0 = XORSrc1.getOperand(0);
   2445   SDValue SRASrc1 = XORSrc1.getOperand(1);
   2446   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
   2447   EVT XType = SRASrc0.getValueType();
   2448   unsigned Size = XType.getSizeInBits() - 1;
   2449 
   2450   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
   2451       XType.isInteger() && SRAConstant != nullptr &&
   2452       Size == SRAConstant->getZExtValue()) {
   2453     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
   2454     return CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
   2455   }
   2456 
   2457   return nullptr;
   2458 }
   2459 
   2460 SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   2461   // The only time a CONCAT_VECTORS operation can have legal types is when
   2462   // two 64-bit vectors are concatenated to a 128-bit vector.
   2463   EVT VT = N->getValueType(0);
   2464   if (!VT.is128BitVector() || N->getNumOperands() != 2)
   2465     llvm_unreachable("unexpected CONCAT_VECTORS");
   2466   return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1));
   2467 }
   2468 
   2469 SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
   2470   SDLoc dl(N);
   2471 
   2472   if (N->isMachineOpcode()) {
   2473     N->setNodeId(-1);
   2474     return nullptr;   // Already selected.
   2475   }
   2476 
   2477   switch (N->getOpcode()) {
   2478   default: break;
   2479   case ISD::WRITE_REGISTER: {
   2480     SDNode *ResNode = SelectWriteRegister(N);
   2481     if (ResNode)
   2482       return ResNode;
   2483     break;
   2484   }
   2485   case ISD::READ_REGISTER: {
   2486     SDNode *ResNode = SelectReadRegister(N);
   2487     if (ResNode)
   2488       return ResNode;
   2489     break;
   2490   }
   2491   case ISD::INLINEASM: {
   2492     SDNode *ResNode = SelectInlineAsm(N);
   2493     if (ResNode)
   2494       return ResNode;
   2495     break;
   2496   }
   2497   case ISD::XOR: {
   2498     // Select special operations if XOR node forms integer ABS pattern
   2499     SDNode *ResNode = SelectABSOp(N);
   2500     if (ResNode)
   2501       return ResNode;
   2502     // Other cases are autogenerated.
   2503     break;
   2504   }
   2505   case ISD::Constant: {
   2506     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
   2507     // If we can't materialize the constant we need to use a literal pool
   2508     if (ConstantMaterializationCost(Val) > 2) {
   2509       SDValue CPIdx = CurDAG->getTargetConstantPool(
   2510           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
   2511           TLI->getPointerTy(CurDAG->getDataLayout()));
   2512 
   2513       SDNode *ResNode;
   2514       if (Subtarget->isThumb()) {
   2515         SDValue Pred = getAL(CurDAG, dl);
   2516         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   2517         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
   2518         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
   2519                                          Ops);
   2520       } else {
   2521         SDValue Ops[] = {
   2522           CPIdx,
   2523           CurDAG->getTargetConstant(0, dl, MVT::i32),
   2524           getAL(CurDAG, dl),
   2525           CurDAG->getRegister(0, MVT::i32),
   2526           CurDAG->getEntryNode()
   2527         };
   2528         ResNode=CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
   2529                                        Ops);
   2530       }
   2531       ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
   2532       return nullptr;
   2533     }
   2534 
   2535     // Other cases are autogenerated.
   2536     break;
   2537   }
   2538   case ISD::FrameIndex: {
   2539     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
   2540     int FI = cast<FrameIndexSDNode>(N)->getIndex();
   2541     SDValue TFI = CurDAG->getTargetFrameIndex(
   2542         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   2543     if (Subtarget->isThumb1Only()) {
   2544       // Set the alignment of the frame object to 4, to avoid having to generate
   2545       // more than one ADD
   2546       MachineFrameInfo *MFI = MF->getFrameInfo();
   2547       if (MFI->getObjectAlignment(FI) < 4)
   2548         MFI->setObjectAlignment(FI, 4);
   2549       return CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
   2550                                   CurDAG->getTargetConstant(0, dl, MVT::i32));
   2551     } else {
   2552       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
   2553                       ARM::t2ADDri : ARM::ADDri);
   2554       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
   2555                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2556                         CurDAG->getRegister(0, MVT::i32) };
   2557       return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2558     }
   2559   }
   2560   case ISD::SRL:
   2561     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
   2562       return I;
   2563     break;
   2564   case ISD::SIGN_EXTEND_INREG:
   2565   case ISD::SRA:
   2566     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, true))
   2567       return I;
   2568     break;
   2569   case ISD::MUL:
   2570     if (Subtarget->isThumb1Only())
   2571       break;
   2572     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
   2573       unsigned RHSV = C->getZExtValue();
   2574       if (!RHSV) break;
   2575       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
   2576         unsigned ShImm = Log2_32(RHSV-1);
   2577         if (ShImm >= 32)
   2578           break;
   2579         SDValue V = N->getOperand(0);
   2580         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
   2581         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
   2582         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2583         if (Subtarget->isThumb()) {
   2584           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
   2585           return CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
   2586         } else {
   2587           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
   2588                             Reg0 };
   2589           return CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
   2590         }
   2591       }
   2592       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
   2593         unsigned ShImm = Log2_32(RHSV+1);
   2594         if (ShImm >= 32)
   2595           break;
   2596         SDValue V = N->getOperand(0);
   2597         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
   2598         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
   2599         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2600         if (Subtarget->isThumb()) {
   2601           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
   2602           return CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
   2603         } else {
   2604           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
   2605                             Reg0 };
   2606           return CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
   2607         }
   2608       }
   2609     }
   2610     break;
   2611   case ISD::AND: {
   2612     // Check for unsigned bitfield extract
   2613     if (SDNode *I = SelectV6T2BitfieldExtractOp(N, false))
   2614       return I;
   2615 
   2616     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
   2617     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
   2618     // are entirely contributed by c2 and lower 16-bits are entirely contributed
   2619     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
   2620     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
   2621     EVT VT = N->getValueType(0);
   2622     if (VT != MVT::i32)
   2623       break;
   2624     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
   2625       ? ARM::t2MOVTi16
   2626       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
   2627     if (!Opc)
   2628       break;
   2629     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2630     ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   2631     if (!N1C)
   2632       break;
   2633     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
   2634       SDValue N2 = N0.getOperand(1);
   2635       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   2636       if (!N2C)
   2637         break;
   2638       unsigned N1CVal = N1C->getZExtValue();
   2639       unsigned N2CVal = N2C->getZExtValue();
   2640       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
   2641           (N1CVal & 0xffffU) == 0xffffU &&
   2642           (N2CVal & 0xffffU) == 0x0U) {
   2643         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
   2644                                                   dl, MVT::i32);
   2645         SDValue Ops[] = { N0.getOperand(0), Imm16,
   2646                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2647         return CurDAG->getMachineNode(Opc, dl, VT, Ops);
   2648       }
   2649     }
   2650     break;
   2651   }
   2652   case ARMISD::VMOVRRD:
   2653     return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
   2654                                   N->getOperand(0), getAL(CurDAG, dl),
   2655                                   CurDAG->getRegister(0, MVT::i32));
   2656   case ISD::UMUL_LOHI: {
   2657     if (Subtarget->isThumb1Only())
   2658       break;
   2659     if (Subtarget->isThumb()) {
   2660       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2661                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2662       return CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops);
   2663     } else {
   2664       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2665                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2666                         CurDAG->getRegister(0, MVT::i32) };
   2667       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
   2668                                     ARM::UMULL : ARM::UMULLv5,
   2669                                     dl, MVT::i32, MVT::i32, Ops);
   2670     }
   2671   }
   2672   case ISD::SMUL_LOHI: {
   2673     if (Subtarget->isThumb1Only())
   2674       break;
   2675     if (Subtarget->isThumb()) {
   2676       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2677                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2678       return CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops);
   2679     } else {
   2680       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2681                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2682                         CurDAG->getRegister(0, MVT::i32) };
   2683       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
   2684                                     ARM::SMULL : ARM::SMULLv5,
   2685                                     dl, MVT::i32, MVT::i32, Ops);
   2686     }
   2687   }
   2688   case ARMISD::UMLAL:{
   2689     if (Subtarget->isThumb()) {
   2690       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2691                         N->getOperand(3), getAL(CurDAG, dl),
   2692                         CurDAG->getRegister(0, MVT::i32)};
   2693       return CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops);
   2694     }else{
   2695       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2696                         N->getOperand(3), getAL(CurDAG, dl),
   2697                         CurDAG->getRegister(0, MVT::i32),
   2698                         CurDAG->getRegister(0, MVT::i32) };
   2699       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
   2700                                       ARM::UMLAL : ARM::UMLALv5,
   2701                                       dl, MVT::i32, MVT::i32, Ops);
   2702     }
   2703   }
   2704   case ARMISD::SMLAL:{
   2705     if (Subtarget->isThumb()) {
   2706       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2707                         N->getOperand(3), getAL(CurDAG, dl),
   2708                         CurDAG->getRegister(0, MVT::i32)};
   2709       return CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops);
   2710     }else{
   2711       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2712                         N->getOperand(3), getAL(CurDAG, dl),
   2713                         CurDAG->getRegister(0, MVT::i32),
   2714                         CurDAG->getRegister(0, MVT::i32) };
   2715       return CurDAG->getMachineNode(Subtarget->hasV6Ops() ?
   2716                                       ARM::SMLAL : ARM::SMLALv5,
   2717                                       dl, MVT::i32, MVT::i32, Ops);
   2718     }
   2719   }
   2720   case ISD::LOAD: {
   2721     SDNode *ResNode = nullptr;
   2722     if (Subtarget->isThumb() && Subtarget->hasThumb2())
   2723       ResNode = SelectT2IndexedLoad(N);
   2724     else
   2725       ResNode = SelectARMIndexedLoad(N);
   2726     if (ResNode)
   2727       return ResNode;
   2728     // Other cases are autogenerated.
   2729     break;
   2730   }
   2731   case ARMISD::BRCOND: {
   2732     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   2733     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
   2734     // Pattern complexity = 6  cost = 1  size = 0
   2735 
   2736     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   2737     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
   2738     // Pattern complexity = 6  cost = 1  size = 0
   2739 
   2740     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   2741     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
   2742     // Pattern complexity = 6  cost = 1  size = 0
   2743 
   2744     unsigned Opc = Subtarget->isThumb() ?
   2745       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
   2746     SDValue Chain = N->getOperand(0);
   2747     SDValue N1 = N->getOperand(1);
   2748     SDValue N2 = N->getOperand(2);
   2749     SDValue N3 = N->getOperand(3);
   2750     SDValue InFlag = N->getOperand(4);
   2751     assert(N1.getOpcode() == ISD::BasicBlock);
   2752     assert(N2.getOpcode() == ISD::Constant);
   2753     assert(N3.getOpcode() == ISD::Register);
   2754 
   2755     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
   2756                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
   2757                                MVT::i32);
   2758     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
   2759     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
   2760                                              MVT::Glue, Ops);
   2761     Chain = SDValue(ResNode, 0);
   2762     if (N->getNumValues() == 2) {
   2763       InFlag = SDValue(ResNode, 1);
   2764       ReplaceUses(SDValue(N, 1), InFlag);
   2765     }
   2766     ReplaceUses(SDValue(N, 0),
   2767                 SDValue(Chain.getNode(), Chain.getResNo()));
   2768     return nullptr;
   2769   }
   2770   case ARMISD::VZIP: {
   2771     unsigned Opc = 0;
   2772     EVT VT = N->getValueType(0);
   2773     switch (VT.getSimpleVT().SimpleTy) {
   2774     default: return nullptr;
   2775     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
   2776     case MVT::v4i16: Opc = ARM::VZIPd16; break;
   2777     case MVT::v2f32:
   2778     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
   2779     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   2780     case MVT::v16i8: Opc = ARM::VZIPq8; break;
   2781     case MVT::v8i16: Opc = ARM::VZIPq16; break;
   2782     case MVT::v4f32:
   2783     case MVT::v4i32: Opc = ARM::VZIPq32; break;
   2784     }
   2785     SDValue Pred = getAL(CurDAG, dl);
   2786     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   2787     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   2788     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   2789   }
   2790   case ARMISD::VUZP: {
   2791     unsigned Opc = 0;
   2792     EVT VT = N->getValueType(0);
   2793     switch (VT.getSimpleVT().SimpleTy) {
   2794     default: return nullptr;
   2795     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
   2796     case MVT::v4i16: Opc = ARM::VUZPd16; break;
   2797     case MVT::v2f32:
   2798     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
   2799     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   2800     case MVT::v16i8: Opc = ARM::VUZPq8; break;
   2801     case MVT::v8i16: Opc = ARM::VUZPq16; break;
   2802     case MVT::v4f32:
   2803     case MVT::v4i32: Opc = ARM::VUZPq32; break;
   2804     }
   2805     SDValue Pred = getAL(CurDAG, dl);
   2806     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   2807     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   2808     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   2809   }
   2810   case ARMISD::VTRN: {
   2811     unsigned Opc = 0;
   2812     EVT VT = N->getValueType(0);
   2813     switch (VT.getSimpleVT().SimpleTy) {
   2814     default: return nullptr;
   2815     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
   2816     case MVT::v4i16: Opc = ARM::VTRNd16; break;
   2817     case MVT::v2f32:
   2818     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   2819     case MVT::v16i8: Opc = ARM::VTRNq8; break;
   2820     case MVT::v8i16: Opc = ARM::VTRNq16; break;
   2821     case MVT::v4f32:
   2822     case MVT::v4i32: Opc = ARM::VTRNq32; break;
   2823     }
   2824     SDValue Pred = getAL(CurDAG, dl);
   2825     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   2826     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   2827     return CurDAG->getMachineNode(Opc, dl, VT, VT, Ops);
   2828   }
   2829   case ARMISD::BUILD_VECTOR: {
   2830     EVT VecVT = N->getValueType(0);
   2831     EVT EltVT = VecVT.getVectorElementType();
   2832     unsigned NumElts = VecVT.getVectorNumElements();
   2833     if (EltVT == MVT::f64) {
   2834       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
   2835       return createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
   2836     }
   2837     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
   2838     if (NumElts == 2)
   2839       return createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1));
   2840     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
   2841     return createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
   2842                      N->getOperand(2), N->getOperand(3));
   2843   }
   2844 
   2845   case ARMISD::VLD2DUP: {
   2846     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
   2847                                         ARM::VLD2DUPd32 };
   2848     return SelectVLDDup(N, false, 2, Opcodes);
   2849   }
   2850 
   2851   case ARMISD::VLD3DUP: {
   2852     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
   2853                                         ARM::VLD3DUPd16Pseudo,
   2854                                         ARM::VLD3DUPd32Pseudo };
   2855     return SelectVLDDup(N, false, 3, Opcodes);
   2856   }
   2857 
   2858   case ARMISD::VLD4DUP: {
   2859     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
   2860                                         ARM::VLD4DUPd16Pseudo,
   2861                                         ARM::VLD4DUPd32Pseudo };
   2862     return SelectVLDDup(N, false, 4, Opcodes);
   2863   }
   2864 
   2865   case ARMISD::VLD2DUP_UPD: {
   2866     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
   2867                                         ARM::VLD2DUPd16wb_fixed,
   2868                                         ARM::VLD2DUPd32wb_fixed };
   2869     return SelectVLDDup(N, true, 2, Opcodes);
   2870   }
   2871 
   2872   case ARMISD::VLD3DUP_UPD: {
   2873     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
   2874                                         ARM::VLD3DUPd16Pseudo_UPD,
   2875                                         ARM::VLD3DUPd32Pseudo_UPD };
   2876     return SelectVLDDup(N, true, 3, Opcodes);
   2877   }
   2878 
   2879   case ARMISD::VLD4DUP_UPD: {
   2880     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
   2881                                         ARM::VLD4DUPd16Pseudo_UPD,
   2882                                         ARM::VLD4DUPd32Pseudo_UPD };
   2883     return SelectVLDDup(N, true, 4, Opcodes);
   2884   }
   2885 
   2886   case ARMISD::VLD1_UPD: {
   2887     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
   2888                                          ARM::VLD1d16wb_fixed,
   2889                                          ARM::VLD1d32wb_fixed,
   2890                                          ARM::VLD1d64wb_fixed };
   2891     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
   2892                                          ARM::VLD1q16wb_fixed,
   2893                                          ARM::VLD1q32wb_fixed,
   2894                                          ARM::VLD1q64wb_fixed };
   2895     return SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
   2896   }
   2897 
   2898   case ARMISD::VLD2_UPD: {
   2899     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
   2900                                          ARM::VLD2d16wb_fixed,
   2901                                          ARM::VLD2d32wb_fixed,
   2902                                          ARM::VLD1q64wb_fixed};
   2903     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
   2904                                          ARM::VLD2q16PseudoWB_fixed,
   2905                                          ARM::VLD2q32PseudoWB_fixed };
   2906     return SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
   2907   }
   2908 
   2909   case ARMISD::VLD3_UPD: {
   2910     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
   2911                                          ARM::VLD3d16Pseudo_UPD,
   2912                                          ARM::VLD3d32Pseudo_UPD,
   2913                                          ARM::VLD1d64TPseudoWB_fixed};
   2914     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
   2915                                           ARM::VLD3q16Pseudo_UPD,
   2916                                           ARM::VLD3q32Pseudo_UPD };
   2917     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
   2918                                           ARM::VLD3q16oddPseudo_UPD,
   2919                                           ARM::VLD3q32oddPseudo_UPD };
   2920     return SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
   2921   }
   2922 
   2923   case ARMISD::VLD4_UPD: {
   2924     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
   2925                                          ARM::VLD4d16Pseudo_UPD,
   2926                                          ARM::VLD4d32Pseudo_UPD,
   2927                                          ARM::VLD1d64QPseudoWB_fixed};
   2928     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
   2929                                           ARM::VLD4q16Pseudo_UPD,
   2930                                           ARM::VLD4q32Pseudo_UPD };
   2931     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
   2932                                           ARM::VLD4q16oddPseudo_UPD,
   2933                                           ARM::VLD4q32oddPseudo_UPD };
   2934     return SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
   2935   }
   2936 
   2937   case ARMISD::VLD2LN_UPD: {
   2938     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
   2939                                          ARM::VLD2LNd16Pseudo_UPD,
   2940                                          ARM::VLD2LNd32Pseudo_UPD };
   2941     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
   2942                                          ARM::VLD2LNq32Pseudo_UPD };
   2943     return SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
   2944   }
   2945 
   2946   case ARMISD::VLD3LN_UPD: {
   2947     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
   2948                                          ARM::VLD3LNd16Pseudo_UPD,
   2949                                          ARM::VLD3LNd32Pseudo_UPD };
   2950     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
   2951                                          ARM::VLD3LNq32Pseudo_UPD };
   2952     return SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
   2953   }
   2954 
   2955   case ARMISD::VLD4LN_UPD: {
   2956     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
   2957                                          ARM::VLD4LNd16Pseudo_UPD,
   2958                                          ARM::VLD4LNd32Pseudo_UPD };
   2959     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
   2960                                          ARM::VLD4LNq32Pseudo_UPD };
   2961     return SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
   2962   }
   2963 
   2964   case ARMISD::VST1_UPD: {
   2965     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
   2966                                          ARM::VST1d16wb_fixed,
   2967                                          ARM::VST1d32wb_fixed,
   2968                                          ARM::VST1d64wb_fixed };
   2969     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
   2970                                          ARM::VST1q16wb_fixed,
   2971                                          ARM::VST1q32wb_fixed,
   2972                                          ARM::VST1q64wb_fixed };
   2973     return SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
   2974   }
   2975 
   2976   case ARMISD::VST2_UPD: {
   2977     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
   2978                                          ARM::VST2d16wb_fixed,
   2979                                          ARM::VST2d32wb_fixed,
   2980                                          ARM::VST1q64wb_fixed};
   2981     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
   2982                                          ARM::VST2q16PseudoWB_fixed,
   2983                                          ARM::VST2q32PseudoWB_fixed };
   2984     return SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
   2985   }
   2986 
   2987   case ARMISD::VST3_UPD: {
   2988     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
   2989                                          ARM::VST3d16Pseudo_UPD,
   2990                                          ARM::VST3d32Pseudo_UPD,
   2991                                          ARM::VST1d64TPseudoWB_fixed};
   2992     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
   2993                                           ARM::VST3q16Pseudo_UPD,
   2994                                           ARM::VST3q32Pseudo_UPD };
   2995     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
   2996                                           ARM::VST3q16oddPseudo_UPD,
   2997                                           ARM::VST3q32oddPseudo_UPD };
   2998     return SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
   2999   }
   3000 
   3001   case ARMISD::VST4_UPD: {
   3002     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
   3003                                          ARM::VST4d16Pseudo_UPD,
   3004                                          ARM::VST4d32Pseudo_UPD,
   3005                                          ARM::VST1d64QPseudoWB_fixed};
   3006     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
   3007                                           ARM::VST4q16Pseudo_UPD,
   3008                                           ARM::VST4q32Pseudo_UPD };
   3009     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
   3010                                           ARM::VST4q16oddPseudo_UPD,
   3011                                           ARM::VST4q32oddPseudo_UPD };
   3012     return SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3013   }
   3014 
   3015   case ARMISD::VST2LN_UPD: {
   3016     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
   3017                                          ARM::VST2LNd16Pseudo_UPD,
   3018                                          ARM::VST2LNd32Pseudo_UPD };
   3019     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
   3020                                          ARM::VST2LNq32Pseudo_UPD };
   3021     return SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
   3022   }
   3023 
   3024   case ARMISD::VST3LN_UPD: {
   3025     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
   3026                                          ARM::VST3LNd16Pseudo_UPD,
   3027                                          ARM::VST3LNd32Pseudo_UPD };
   3028     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
   3029                                          ARM::VST3LNq32Pseudo_UPD };
   3030     return SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
   3031   }
   3032 
   3033   case ARMISD::VST4LN_UPD: {
   3034     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
   3035                                          ARM::VST4LNd16Pseudo_UPD,
   3036                                          ARM::VST4LNd32Pseudo_UPD };
   3037     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
   3038                                          ARM::VST4LNq32Pseudo_UPD };
   3039     return SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
   3040   }
   3041 
   3042   case ISD::INTRINSIC_VOID:
   3043   case ISD::INTRINSIC_W_CHAIN: {
   3044     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   3045     switch (IntNo) {
   3046     default:
   3047       break;
   3048 
   3049     case Intrinsic::arm_ldaexd:
   3050     case Intrinsic::arm_ldrexd: {
   3051       SDLoc dl(N);
   3052       SDValue Chain = N->getOperand(0);
   3053       SDValue MemAddr = N->getOperand(2);
   3054       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
   3055 
   3056       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
   3057       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
   3058                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
   3059 
   3060       // arm_ldrexd returns a i64 value in {i32, i32}
   3061       std::vector<EVT> ResTys;
   3062       if (isThumb) {
   3063         ResTys.push_back(MVT::i32);
   3064         ResTys.push_back(MVT::i32);
   3065       } else
   3066         ResTys.push_back(MVT::Untyped);
   3067       ResTys.push_back(MVT::Other);
   3068 
   3069       // Place arguments in the right order.
   3070       SmallVector<SDValue, 7> Ops;
   3071       Ops.push_back(MemAddr);
   3072       Ops.push_back(getAL(CurDAG, dl));
   3073       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3074       Ops.push_back(Chain);
   3075       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
   3076       // Transfer memoperands.
   3077       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   3078       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   3079       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
   3080 
   3081       // Remap uses.
   3082       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
   3083       if (!SDValue(N, 0).use_empty()) {
   3084         SDValue Result;
   3085         if (isThumb)
   3086           Result = SDValue(Ld, 0);
   3087         else {
   3088           SDValue SubRegIdx =
   3089             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
   3090           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
   3091               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
   3092           Result = SDValue(ResNode,0);
   3093         }
   3094         ReplaceUses(SDValue(N, 0), Result);
   3095       }
   3096       if (!SDValue(N, 1).use_empty()) {
   3097         SDValue Result;
   3098         if (isThumb)
   3099           Result = SDValue(Ld, 1);
   3100         else {
   3101           SDValue SubRegIdx =
   3102             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
   3103           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
   3104               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
   3105           Result = SDValue(ResNode,0);
   3106         }
   3107         ReplaceUses(SDValue(N, 1), Result);
   3108       }
   3109       ReplaceUses(SDValue(N, 2), OutChain);
   3110       return nullptr;
   3111     }
   3112     case Intrinsic::arm_stlexd:
   3113     case Intrinsic::arm_strexd: {
   3114       SDLoc dl(N);
   3115       SDValue Chain = N->getOperand(0);
   3116       SDValue Val0 = N->getOperand(2);
   3117       SDValue Val1 = N->getOperand(3);
   3118       SDValue MemAddr = N->getOperand(4);
   3119 
   3120       // Store exclusive double return a i32 value which is the return status
   3121       // of the issued store.
   3122       const EVT ResTys[] = {MVT::i32, MVT::Other};
   3123 
   3124       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
   3125       // Place arguments in the right order.
   3126       SmallVector<SDValue, 7> Ops;
   3127       if (isThumb) {
   3128         Ops.push_back(Val0);
   3129         Ops.push_back(Val1);
   3130       } else
   3131         // arm_strexd uses GPRPair.
   3132         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
   3133       Ops.push_back(MemAddr);
   3134       Ops.push_back(getAL(CurDAG, dl));
   3135       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3136       Ops.push_back(Chain);
   3137 
   3138       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
   3139       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
   3140                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
   3141 
   3142       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
   3143       // Transfer memoperands.
   3144       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   3145       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   3146       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   3147 
   3148       return St;
   3149     }
   3150 
   3151     case Intrinsic::arm_neon_vld1: {
   3152       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
   3153                                            ARM::VLD1d32, ARM::VLD1d64 };
   3154       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
   3155                                            ARM::VLD1q32, ARM::VLD1q64};
   3156       return SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
   3157     }
   3158 
   3159     case Intrinsic::arm_neon_vld2: {
   3160       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
   3161                                            ARM::VLD2d32, ARM::VLD1q64 };
   3162       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
   3163                                            ARM::VLD2q32Pseudo };
   3164       return SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
   3165     }
   3166 
   3167     case Intrinsic::arm_neon_vld3: {
   3168       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
   3169                                            ARM::VLD3d16Pseudo,
   3170                                            ARM::VLD3d32Pseudo,
   3171                                            ARM::VLD1d64TPseudo };
   3172       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
   3173                                             ARM::VLD3q16Pseudo_UPD,
   3174                                             ARM::VLD3q32Pseudo_UPD };
   3175       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
   3176                                             ARM::VLD3q16oddPseudo,
   3177                                             ARM::VLD3q32oddPseudo };
   3178       return SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3179     }
   3180 
   3181     case Intrinsic::arm_neon_vld4: {
   3182       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
   3183                                            ARM::VLD4d16Pseudo,
   3184                                            ARM::VLD4d32Pseudo,
   3185                                            ARM::VLD1d64QPseudo };
   3186       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
   3187                                             ARM::VLD4q16Pseudo_UPD,
   3188                                             ARM::VLD4q32Pseudo_UPD };
   3189       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
   3190                                             ARM::VLD4q16oddPseudo,
   3191                                             ARM::VLD4q32oddPseudo };
   3192       return SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3193     }
   3194 
   3195     case Intrinsic::arm_neon_vld2lane: {
   3196       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
   3197                                            ARM::VLD2LNd16Pseudo,
   3198                                            ARM::VLD2LNd32Pseudo };
   3199       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
   3200                                            ARM::VLD2LNq32Pseudo };
   3201       return SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
   3202     }
   3203 
   3204     case Intrinsic::arm_neon_vld3lane: {
   3205       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
   3206                                            ARM::VLD3LNd16Pseudo,
   3207                                            ARM::VLD3LNd32Pseudo };
   3208       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
   3209                                            ARM::VLD3LNq32Pseudo };
   3210       return SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
   3211     }
   3212 
   3213     case Intrinsic::arm_neon_vld4lane: {
   3214       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
   3215                                            ARM::VLD4LNd16Pseudo,
   3216                                            ARM::VLD4LNd32Pseudo };
   3217       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
   3218                                            ARM::VLD4LNq32Pseudo };
   3219       return SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
   3220     }
   3221 
   3222     case Intrinsic::arm_neon_vst1: {
   3223       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
   3224                                            ARM::VST1d32, ARM::VST1d64 };
   3225       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
   3226                                            ARM::VST1q32, ARM::VST1q64 };
   3227       return SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
   3228     }
   3229 
   3230     case Intrinsic::arm_neon_vst2: {
   3231       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
   3232                                            ARM::VST2d32, ARM::VST1q64 };
   3233       static uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
   3234                                      ARM::VST2q32Pseudo };
   3235       return SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
   3236     }
   3237 
   3238     case Intrinsic::arm_neon_vst3: {
   3239       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
   3240                                            ARM::VST3d16Pseudo,
   3241                                            ARM::VST3d32Pseudo,
   3242                                            ARM::VST1d64TPseudo };
   3243       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
   3244                                             ARM::VST3q16Pseudo_UPD,
   3245                                             ARM::VST3q32Pseudo_UPD };
   3246       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
   3247                                             ARM::VST3q16oddPseudo,
   3248                                             ARM::VST3q32oddPseudo };
   3249       return SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3250     }
   3251 
   3252     case Intrinsic::arm_neon_vst4: {
   3253       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
   3254                                            ARM::VST4d16Pseudo,
   3255                                            ARM::VST4d32Pseudo,
   3256                                            ARM::VST1d64QPseudo };
   3257       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
   3258                                             ARM::VST4q16Pseudo_UPD,
   3259                                             ARM::VST4q32Pseudo_UPD };
   3260       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
   3261                                             ARM::VST4q16oddPseudo,
   3262                                             ARM::VST4q32oddPseudo };
   3263       return SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3264     }
   3265 
   3266     case Intrinsic::arm_neon_vst2lane: {
   3267       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
   3268                                            ARM::VST2LNd16Pseudo,
   3269                                            ARM::VST2LNd32Pseudo };
   3270       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
   3271                                            ARM::VST2LNq32Pseudo };
   3272       return SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
   3273     }
   3274 
   3275     case Intrinsic::arm_neon_vst3lane: {
   3276       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
   3277                                            ARM::VST3LNd16Pseudo,
   3278                                            ARM::VST3LNd32Pseudo };
   3279       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
   3280                                            ARM::VST3LNq32Pseudo };
   3281       return SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
   3282     }
   3283 
   3284     case Intrinsic::arm_neon_vst4lane: {
   3285       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
   3286                                            ARM::VST4LNd16Pseudo,
   3287                                            ARM::VST4LNd32Pseudo };
   3288       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
   3289                                            ARM::VST4LNq32Pseudo };
   3290       return SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
   3291     }
   3292     }
   3293     break;
   3294   }
   3295 
   3296   case ISD::INTRINSIC_WO_CHAIN: {
   3297     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   3298     switch (IntNo) {
   3299     default:
   3300       break;
   3301 
   3302     case Intrinsic::arm_neon_vtbl2:
   3303       return SelectVTBL(N, false, 2, ARM::VTBL2);
   3304     case Intrinsic::arm_neon_vtbl3:
   3305       return SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
   3306     case Intrinsic::arm_neon_vtbl4:
   3307       return SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
   3308 
   3309     case Intrinsic::arm_neon_vtbx2:
   3310       return SelectVTBL(N, true, 2, ARM::VTBX2);
   3311     case Intrinsic::arm_neon_vtbx3:
   3312       return SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
   3313     case Intrinsic::arm_neon_vtbx4:
   3314       return SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
   3315     }
   3316     break;
   3317   }
   3318 
   3319   case ARMISD::VTBL1: {
   3320     SDLoc dl(N);
   3321     EVT VT = N->getValueType(0);
   3322     SmallVector<SDValue, 6> Ops;
   3323 
   3324     Ops.push_back(N->getOperand(0));
   3325     Ops.push_back(N->getOperand(1));
   3326     Ops.push_back(getAL(CurDAG, dl));                // Predicate
   3327     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
   3328     return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops);
   3329   }
   3330   case ARMISD::VTBL2: {
   3331     SDLoc dl(N);
   3332     EVT VT = N->getValueType(0);
   3333 
   3334     // Form a REG_SEQUENCE to force register allocation.
   3335     SDValue V0 = N->getOperand(0);
   3336     SDValue V1 = N->getOperand(1);
   3337     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
   3338 
   3339     SmallVector<SDValue, 6> Ops;
   3340     Ops.push_back(RegSeq);
   3341     Ops.push_back(N->getOperand(2));
   3342     Ops.push_back(getAL(CurDAG, dl));                // Predicate
   3343     Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // Predicate Register
   3344     return CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops);
   3345   }
   3346 
   3347   case ISD::CONCAT_VECTORS:
   3348     return SelectConcatVector(N);
   3349   }
   3350 
   3351   return SelectCode(N);
   3352 }
   3353 
   3354 // Inspect a register string of the form
   3355 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
   3356 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
   3357 // and obtain the integer operands from them, adding these operands to the
   3358 // provided vector.
   3359 static void getIntOperandsFromRegisterString(StringRef RegString,
   3360                                              SelectionDAG *CurDAG, SDLoc DL,
   3361                                              std::vector<SDValue>& Ops) {
   3362   SmallVector<StringRef, 5> Fields;
   3363   RegString.split(Fields, ':');
   3364 
   3365   if (Fields.size() > 1) {
   3366     bool AllIntFields = true;
   3367 
   3368     for (StringRef Field : Fields) {
   3369       // Need to trim out leading 'cp' characters and get the integer field.
   3370       unsigned IntField;
   3371       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
   3372       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
   3373     }
   3374 
   3375     assert(AllIntFields &&
   3376             "Unexpected non-integer value in special register string.");
   3377   }
   3378 }
   3379 
   3380 // Maps a Banked Register string to its mask value. The mask value returned is
   3381 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
   3382 // mask operand, which expresses which register is to be used, e.g. r8, and in
   3383 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
   3384 // was invalid.
   3385 static inline int getBankedRegisterMask(StringRef RegString) {
   3386   return StringSwitch<int>(RegString.lower())
   3387           .Case("r8_usr", 0x00)
   3388           .Case("r9_usr", 0x01)
   3389           .Case("r10_usr", 0x02)
   3390           .Case("r11_usr", 0x03)
   3391           .Case("r12_usr", 0x04)
   3392           .Case("sp_usr", 0x05)
   3393           .Case("lr_usr", 0x06)
   3394           .Case("r8_fiq", 0x08)
   3395           .Case("r9_fiq", 0x09)
   3396           .Case("r10_fiq", 0x0a)
   3397           .Case("r11_fiq", 0x0b)
   3398           .Case("r12_fiq", 0x0c)
   3399           .Case("sp_fiq", 0x0d)
   3400           .Case("lr_fiq", 0x0e)
   3401           .Case("lr_irq", 0x10)
   3402           .Case("sp_irq", 0x11)
   3403           .Case("lr_svc", 0x12)
   3404           .Case("sp_svc", 0x13)
   3405           .Case("lr_abt", 0x14)
   3406           .Case("sp_abt", 0x15)
   3407           .Case("lr_und", 0x16)
   3408           .Case("sp_und", 0x17)
   3409           .Case("lr_mon", 0x1c)
   3410           .Case("sp_mon", 0x1d)
   3411           .Case("elr_hyp", 0x1e)
   3412           .Case("sp_hyp", 0x1f)
   3413           .Case("spsr_fiq", 0x2e)
   3414           .Case("spsr_irq", 0x30)
   3415           .Case("spsr_svc", 0x32)
   3416           .Case("spsr_abt", 0x34)
   3417           .Case("spsr_und", 0x36)
   3418           .Case("spsr_mon", 0x3c)
   3419           .Case("spsr_hyp", 0x3e)
   3420           .Default(-1);
   3421 }
   3422 
   3423 // Maps a MClass special register string to its value for use in the
   3424 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
   3425 // Returns -1 to signify that the string was invalid.
   3426 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
   3427   return StringSwitch<int>(RegString.lower())
   3428           .Case("apsr", 0x0)
   3429           .Case("iapsr", 0x1)
   3430           .Case("eapsr", 0x2)
   3431           .Case("xpsr", 0x3)
   3432           .Case("ipsr", 0x5)
   3433           .Case("epsr", 0x6)
   3434           .Case("iepsr", 0x7)
   3435           .Case("msp", 0x8)
   3436           .Case("psp", 0x9)
   3437           .Case("primask", 0x10)
   3438           .Case("basepri", 0x11)
   3439           .Case("basepri_max", 0x12)
   3440           .Case("faultmask", 0x13)
   3441           .Case("control", 0x14)
   3442           .Default(-1);
   3443 }
   3444 
   3445 // The flags here are common to those allowed for apsr in the A class cores and
   3446 // those allowed for the special registers in the M class cores. Returns a
   3447 // value representing which flags were present, -1 if invalid.
   3448 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
   3449   if (Flags.empty())
   3450     return 0x2 | (int)hasDSP;
   3451 
   3452   return StringSwitch<int>(Flags)
   3453           .Case("g", 0x1)
   3454           .Case("nzcvq", 0x2)
   3455           .Case("nzcvqg", 0x3)
   3456           .Default(-1);
   3457 }
   3458 
   3459 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
   3460                                  const ARMSubtarget *Subtarget) {
   3461   // Ensure that the register (without flags) was a valid M Class special
   3462   // register.
   3463   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
   3464   if (SYSmvalue == -1)
   3465     return -1;
   3466 
   3467   // basepri, basepri_max and faultmask are only valid for V7m.
   3468   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
   3469     return -1;
   3470 
   3471   // If it was a read then we won't be expecting flags and so at this point
   3472   // we can return the mask.
   3473   if (IsRead) {
   3474     assert (Flags.empty() && "Unexpected flags for reading M class register.");
   3475     return SYSmvalue;
   3476   }
   3477 
   3478   // We know we are now handling a write so need to get the mask for the flags.
   3479   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
   3480 
   3481   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
   3482   // shouldn't have flags present.
   3483   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
   3484     return -1;
   3485 
   3486   // The _g and _nzcvqg versions are only valid if the DSP extension is
   3487   // available.
   3488   if (!Subtarget->hasDSP() && (Mask & 0x1))
   3489     return -1;
   3490 
   3491   // The register was valid so need to put the mask in the correct place
   3492   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
   3493   // construct the operand for the instruction node.
   3494   if (SYSmvalue < 0x4)
   3495     return SYSmvalue | Mask << 10;
   3496 
   3497   return SYSmvalue;
   3498 }
   3499 
   3500 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
   3501   // The mask operand contains the special register (R Bit) in bit 4, whether
   3502   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
   3503   // bits 3-0 contains the fields to be accessed in the special register, set by
   3504   // the flags provided with the register.
   3505   int Mask = 0;
   3506   if (Reg == "apsr") {
   3507     // The flags permitted for apsr are the same flags that are allowed in
   3508     // M class registers. We get the flag value and then shift the flags into
   3509     // the correct place to combine with the mask.
   3510     Mask = getMClassFlagsMask(Flags, true);
   3511     if (Mask == -1)
   3512       return -1;
   3513     return Mask << 2;
   3514   }
   3515 
   3516   if (Reg != "cpsr" && Reg != "spsr") {
   3517     return -1;
   3518   }
   3519 
   3520   // This is the same as if the flags were "fc"
   3521   if (Flags.empty() || Flags == "all")
   3522     return Mask | 0x9;
   3523 
   3524   // Inspect the supplied flags string and set the bits in the mask for
   3525   // the relevant and valid flags allowed for cpsr and spsr.
   3526   for (char Flag : Flags) {
   3527     int FlagVal;
   3528     switch (Flag) {
   3529       case 'c':
   3530         FlagVal = 0x1;
   3531         break;
   3532       case 'x':
   3533         FlagVal = 0x2;
   3534         break;
   3535       case 's':
   3536         FlagVal = 0x4;
   3537         break;
   3538       case 'f':
   3539         FlagVal = 0x8;
   3540         break;
   3541       default:
   3542         FlagVal = 0;
   3543     }
   3544 
   3545     // This avoids allowing strings where the same flag bit appears twice.
   3546     if (!FlagVal || (Mask & FlagVal))
   3547       return -1;
   3548     Mask |= FlagVal;
   3549   }
   3550 
   3551   // If the register is spsr then we need to set the R bit.
   3552   if (Reg == "spsr")
   3553     Mask |= 0x10;
   3554 
   3555   return Mask;
   3556 }
   3557 
   3558 // Lower the read_register intrinsic to ARM specific DAG nodes
   3559 // using the supplied metadata string to select the instruction node to use
   3560 // and the registers/masks to construct as operands for the node.
   3561 SDNode *ARMDAGToDAGISel::SelectReadRegister(SDNode *N){
   3562   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   3563   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   3564   bool IsThumb2 = Subtarget->isThumb2();
   3565   SDLoc DL(N);
   3566 
   3567   std::vector<SDValue> Ops;
   3568   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
   3569 
   3570   if (!Ops.empty()) {
   3571     // If the special register string was constructed of fields (as defined
   3572     // in the ACLE) then need to lower to MRC node (32 bit) or
   3573     // MRRC node(64 bit), we can make the distinction based on the number of
   3574     // operands we have.
   3575     unsigned Opcode;
   3576     SmallVector<EVT, 3> ResTypes;
   3577     if (Ops.size() == 5){
   3578       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
   3579       ResTypes.append({ MVT::i32, MVT::Other });
   3580     } else {
   3581       assert(Ops.size() == 3 &&
   3582               "Invalid number of fields in special register string.");
   3583       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
   3584       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
   3585     }
   3586 
   3587     Ops.push_back(getAL(CurDAG, DL));
   3588     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3589     Ops.push_back(N->getOperand(0));
   3590     return CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops);
   3591   }
   3592 
   3593   std::string SpecialReg = RegString->getString().lower();
   3594 
   3595   int BankedReg = getBankedRegisterMask(SpecialReg);
   3596   if (BankedReg != -1) {
   3597     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
   3598             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3599             N->getOperand(0) };
   3600     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
   3601                                   DL, MVT::i32, MVT::Other, Ops);
   3602   }
   3603 
   3604   // The VFP registers are read by creating SelectionDAG nodes with opcodes
   3605   // corresponding to the register that is being read from. So we switch on the
   3606   // string to find which opcode we need to use.
   3607   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
   3608                     .Case("fpscr", ARM::VMRS)
   3609                     .Case("fpexc", ARM::VMRS_FPEXC)
   3610                     .Case("fpsid", ARM::VMRS_FPSID)
   3611                     .Case("mvfr0", ARM::VMRS_MVFR0)
   3612                     .Case("mvfr1", ARM::VMRS_MVFR1)
   3613                     .Case("mvfr2", ARM::VMRS_MVFR2)
   3614                     .Case("fpinst", ARM::VMRS_FPINST)
   3615                     .Case("fpinst2", ARM::VMRS_FPINST2)
   3616                     .Default(0);
   3617 
   3618   // If an opcode was found then we can lower the read to a VFP instruction.
   3619   if (Opcode) {
   3620     if (!Subtarget->hasVFP2())
   3621       return nullptr;
   3622     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
   3623       return nullptr;
   3624 
   3625     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3626             N->getOperand(0) };
   3627     return CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops);
   3628   }
   3629 
   3630   // If the target is M Class then need to validate that the register string
   3631   // is an acceptable value, so check that a mask can be constructed from the
   3632   // string.
   3633   if (Subtarget->isMClass()) {
   3634     int SYSmValue = getMClassRegisterMask(SpecialReg, "", true, Subtarget);
   3635     if (SYSmValue == -1)
   3636       return nullptr;
   3637 
   3638     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
   3639                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3640                       N->getOperand(0) };
   3641     return CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops);
   3642   }
   3643 
   3644   // Here we know the target is not M Class so we need to check if it is one
   3645   // of the remaining possible values which are apsr, cpsr or spsr.
   3646   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
   3647     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3648             N->getOperand(0) };
   3649     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, DL,
   3650                                   MVT::i32, MVT::Other, Ops);
   3651   }
   3652 
   3653   if (SpecialReg == "spsr") {
   3654     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3655             N->getOperand(0) };
   3656     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys,
   3657                                   DL, MVT::i32, MVT::Other, Ops);
   3658   }
   3659 
   3660   return nullptr;
   3661 }
   3662 
   3663 // Lower the write_register intrinsic to ARM specific DAG nodes
   3664 // using the supplied metadata string to select the instruction node to use
   3665 // and the registers/masks to use in the nodes
   3666 SDNode *ARMDAGToDAGISel::SelectWriteRegister(SDNode *N){
   3667   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   3668   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   3669   bool IsThumb2 = Subtarget->isThumb2();
   3670   SDLoc DL(N);
   3671 
   3672   std::vector<SDValue> Ops;
   3673   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
   3674 
   3675   if (!Ops.empty()) {
   3676     // If the special register string was constructed of fields (as defined
   3677     // in the ACLE) then need to lower to MCR node (32 bit) or
   3678     // MCRR node(64 bit), we can make the distinction based on the number of
   3679     // operands we have.
   3680     unsigned Opcode;
   3681     if (Ops.size() == 5) {
   3682       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
   3683       Ops.insert(Ops.begin()+2, N->getOperand(2));
   3684     } else {
   3685       assert(Ops.size() == 3 &&
   3686               "Invalid number of fields in special register string.");
   3687       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
   3688       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
   3689       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
   3690     }
   3691 
   3692     Ops.push_back(getAL(CurDAG, DL));
   3693     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3694     Ops.push_back(N->getOperand(0));
   3695 
   3696     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   3697   }
   3698 
   3699   std::string SpecialReg = RegString->getString().lower();
   3700   int BankedReg = getBankedRegisterMask(SpecialReg);
   3701   if (BankedReg != -1) {
   3702     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
   3703             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3704             N->getOperand(0) };
   3705     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
   3706                                   DL, MVT::Other, Ops);
   3707   }
   3708 
   3709   // The VFP registers are written to by creating SelectionDAG nodes with
   3710   // opcodes corresponding to the register that is being written. So we switch
   3711   // on the string to find which opcode we need to use.
   3712   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
   3713                     .Case("fpscr", ARM::VMSR)
   3714                     .Case("fpexc", ARM::VMSR_FPEXC)
   3715                     .Case("fpsid", ARM::VMSR_FPSID)
   3716                     .Case("fpinst", ARM::VMSR_FPINST)
   3717                     .Case("fpinst2", ARM::VMSR_FPINST2)
   3718                     .Default(0);
   3719 
   3720   if (Opcode) {
   3721     if (!Subtarget->hasVFP2())
   3722       return nullptr;
   3723     Ops = { N->getOperand(2), getAL(CurDAG, DL),
   3724             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
   3725     return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
   3726   }
   3727 
   3728   SmallVector<StringRef, 5> Fields;
   3729   StringRef(SpecialReg).split(Fields, '_', 1, false);
   3730   std::string Reg = Fields[0].str();
   3731   StringRef Flags = Fields.size() == 2 ? Fields[1] : "";
   3732 
   3733   // If the target was M Class then need to validate the special register value
   3734   // and retrieve the mask for use in the instruction node.
   3735   if (Subtarget->isMClass()) {
   3736     // basepri_max gets split so need to correct Reg and Flags.
   3737     if (SpecialReg == "basepri_max") {
   3738       Reg = SpecialReg;
   3739       Flags = "";
   3740     }
   3741     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
   3742     if (SYSmValue == -1)
   3743       return nullptr;
   3744 
   3745     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
   3746                       N->getOperand(2), getAL(CurDAG, DL),
   3747                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
   3748     return CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops);
   3749   }
   3750 
   3751   // We then check to see if a valid mask can be constructed for one of the
   3752   // register string values permitted for the A and R class cores. These values
   3753   // are apsr, spsr and cpsr; these are also valid on older cores.
   3754   int Mask = getARClassRegisterMask(Reg, Flags);
   3755   if (Mask != -1) {
   3756     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
   3757             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3758             N->getOperand(0) };
   3759     return CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
   3760                                   DL, MVT::Other, Ops);
   3761   }
   3762 
   3763   return nullptr;
   3764 }
   3765 
   3766 SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){
   3767   std::vector<SDValue> AsmNodeOperands;
   3768   unsigned Flag, Kind;
   3769   bool Changed = false;
   3770   unsigned NumOps = N->getNumOperands();
   3771 
   3772   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
   3773   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
   3774   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
   3775   // respectively. Since there is no constraint to explicitly specify a
   3776   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
   3777   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
   3778   // them into a GPRPair.
   3779 
   3780   SDLoc dl(N);
   3781   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
   3782                                    : SDValue(nullptr,0);
   3783 
   3784   SmallVector<bool, 8> OpChanged;
   3785   // Glue node will be appended late.
   3786   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
   3787     SDValue op = N->getOperand(i);
   3788     AsmNodeOperands.push_back(op);
   3789 
   3790     if (i < InlineAsm::Op_FirstOperand)
   3791       continue;
   3792 
   3793     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
   3794       Flag = C->getZExtValue();
   3795       Kind = InlineAsm::getKind(Flag);
   3796     }
   3797     else
   3798       continue;
   3799 
   3800     // Immediate operands to inline asm in the SelectionDAG are modeled with
   3801     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
   3802     // the second is a constant with the value of the immediate. If we get here
   3803     // and we have a Kind_Imm, skip the next operand, and continue.
   3804     if (Kind == InlineAsm::Kind_Imm) {
   3805       SDValue op = N->getOperand(++i);
   3806       AsmNodeOperands.push_back(op);
   3807       continue;
   3808     }
   3809 
   3810     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
   3811     if (NumRegs)
   3812       OpChanged.push_back(false);
   3813 
   3814     unsigned DefIdx = 0;
   3815     bool IsTiedToChangedOp = false;
   3816     // If it's a use that is tied with a previous def, it has no
   3817     // reg class constraint.
   3818     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
   3819       IsTiedToChangedOp = OpChanged[DefIdx];
   3820 
   3821     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
   3822         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
   3823       continue;
   3824 
   3825     unsigned RC;
   3826     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
   3827     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
   3828         || NumRegs != 2)
   3829       continue;
   3830 
   3831     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
   3832     SDValue V0 = N->getOperand(i+1);
   3833     SDValue V1 = N->getOperand(i+2);
   3834     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
   3835     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
   3836     SDValue PairedReg;
   3837     MachineRegisterInfo &MRI = MF->getRegInfo();
   3838 
   3839     if (Kind == InlineAsm::Kind_RegDef ||
   3840         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
   3841       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
   3842       // the original GPRs.
   3843 
   3844       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   3845       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
   3846       SDValue Chain = SDValue(N,0);
   3847 
   3848       SDNode *GU = N->getGluedUser();
   3849       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
   3850                                                Chain.getValue(1));
   3851 
   3852       // Extract values from a GPRPair reg and copy to the original GPR reg.
   3853       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
   3854                                                     RegCopy);
   3855       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
   3856                                                     RegCopy);
   3857       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
   3858                                         RegCopy.getValue(1));
   3859       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
   3860 
   3861       // Update the original glue user.
   3862       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
   3863       Ops.push_back(T1.getValue(1));
   3864       CurDAG->UpdateNodeOperands(GU, Ops);
   3865     }
   3866     else {
   3867       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
   3868       // GPRPair and then pass the GPRPair to the inline asm.
   3869       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
   3870 
   3871       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
   3872       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
   3873                                           Chain.getValue(1));
   3874       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
   3875                                           T0.getValue(1));
   3876       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
   3877 
   3878       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
   3879       // i32 VRs of inline asm with it.
   3880       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   3881       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
   3882       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
   3883 
   3884       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
   3885       Glue = Chain.getValue(1);
   3886     }
   3887 
   3888     Changed = true;
   3889 
   3890     if(PairedReg.getNode()) {
   3891       OpChanged[OpChanged.size() -1 ] = true;
   3892       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
   3893       if (IsTiedToChangedOp)
   3894         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
   3895       else
   3896         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
   3897       // Replace the current flag.
   3898       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
   3899           Flag, dl, MVT::i32);
   3900       // Add the new register node and skip the original two GPRs.
   3901       AsmNodeOperands.push_back(PairedReg);
   3902       // Skip the next two GPRs.
   3903       i += 2;
   3904     }
   3905   }
   3906 
   3907   if (Glue.getNode())
   3908     AsmNodeOperands.push_back(Glue);
   3909   if (!Changed)
   3910     return nullptr;
   3911 
   3912   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
   3913       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
   3914   New->setNodeId(-1);
   3915   return New.getNode();
   3916 }
   3917 
   3918 
   3919 bool ARMDAGToDAGISel::
   3920 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   3921                              std::vector<SDValue> &OutOps) {
   3922   switch(ConstraintID) {
   3923   default:
   3924     llvm_unreachable("Unexpected asm memory constraint");
   3925   case InlineAsm::Constraint_i:
   3926     // FIXME: It seems strange that 'i' is needed here since it's supposed to
   3927     //        be an immediate and not a memory constraint.
   3928     // Fallthrough.
   3929   case InlineAsm::Constraint_m:
   3930   case InlineAsm::Constraint_o:
   3931   case InlineAsm::Constraint_Q:
   3932   case InlineAsm::Constraint_Um:
   3933   case InlineAsm::Constraint_Un:
   3934   case InlineAsm::Constraint_Uq:
   3935   case InlineAsm::Constraint_Us:
   3936   case InlineAsm::Constraint_Ut:
   3937   case InlineAsm::Constraint_Uv:
   3938   case InlineAsm::Constraint_Uy:
   3939     // Require the address to be in a register.  That is safe for all ARM
   3940     // variants and it is hard to do anything much smarter without knowing
   3941     // how the operand is used.
   3942     OutOps.push_back(Op);
   3943     return false;
   3944   }
   3945   return true;
   3946 }
   3947 
   3948 /// createARMISelDag - This pass converts a legalized DAG into a
   3949 /// ARM-specific DAG, ready for instruction scheduling.
   3950 ///
   3951 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
   3952                                      CodeGenOpt::Level OptLevel) {
   3953   return new ARMDAGToDAGISel(TM, OptLevel);
   3954 }
   3955