Home | History | Annotate | Download | only in ARM
      1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines an instruction selector for the ARM target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "ARM.h"
     15 #include "ARMBaseInstrInfo.h"
     16 #include "ARMTargetMachine.h"
     17 #include "MCTargetDesc/ARMAddressingModes.h"
     18 #include "llvm/ADT/StringSwitch.h"
     19 #include "llvm/CodeGen/MachineFrameInfo.h"
     20 #include "llvm/CodeGen/MachineFunction.h"
     21 #include "llvm/CodeGen/MachineInstrBuilder.h"
     22 #include "llvm/CodeGen/MachineRegisterInfo.h"
     23 #include "llvm/CodeGen/SelectionDAG.h"
     24 #include "llvm/CodeGen/SelectionDAGISel.h"
     25 #include "llvm/IR/CallingConv.h"
     26 #include "llvm/IR/Constants.h"
     27 #include "llvm/IR/DerivedTypes.h"
     28 #include "llvm/IR/Function.h"
     29 #include "llvm/IR/Intrinsics.h"
     30 #include "llvm/IR/LLVMContext.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/ErrorHandling.h"
     34 #include "llvm/Target/TargetLowering.h"
     35 #include "llvm/Target/TargetOptions.h"
     36 
     37 using namespace llvm;
     38 
     39 #define DEBUG_TYPE "arm-isel"
     40 
     41 static cl::opt<bool>
     42 DisableShifterOp("disable-shifter-op", cl::Hidden,
     43   cl::desc("Disable isel of shifter-op"),
     44   cl::init(false));
     45 
     46 //===--------------------------------------------------------------------===//
     47 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
     48 /// instructions for SelectionDAG operations.
     49 ///
     50 namespace {
     51 
     52 enum AddrMode2Type {
     53   AM2_BASE, // Simple AM2 (+-imm12)
     54   AM2_SHOP  // Shifter-op AM2
     55 };
     56 
     57 class ARMDAGToDAGISel : public SelectionDAGISel {
     58   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
     59   /// make the right decision when generating code for different targets.
     60   const ARMSubtarget *Subtarget;
     61 
     62 public:
     63   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel)
     64       : SelectionDAGISel(tm, OptLevel) {}
     65 
     66   bool runOnMachineFunction(MachineFunction &MF) override {
     67     // Reset the subtarget each time through.
     68     Subtarget = &MF.getSubtarget<ARMSubtarget>();
     69     SelectionDAGISel::runOnMachineFunction(MF);
     70     return true;
     71   }
     72 
     73   const char *getPassName() const override {
     74     return "ARM Instruction Selection";
     75   }
     76 
     77   void PreprocessISelDAG() override;
     78 
     79   /// getI32Imm - Return a target constant of type i32 with the specified
     80   /// value.
     81   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
     82     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
     83   }
     84 
     85   void Select(SDNode *N) override;
     86 
     87   bool hasNoVMLxHazardUse(SDNode *N) const;
     88   bool isShifterOpProfitable(const SDValue &Shift,
     89                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
     90   bool SelectRegShifterOperand(SDValue N, SDValue &A,
     91                                SDValue &B, SDValue &C,
     92                                bool CheckProfitability = true);
     93   bool SelectImmShifterOperand(SDValue N, SDValue &A,
     94                                SDValue &B, bool CheckProfitability = true);
     95   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A,
     96                                     SDValue &B, SDValue &C) {
     97     // Don't apply the profitability check
     98     return SelectRegShifterOperand(N, A, B, C, false);
     99   }
    100   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A,
    101                                     SDValue &B) {
    102     // Don't apply the profitability check
    103     return SelectImmShifterOperand(N, A, B, false);
    104   }
    105 
    106   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
    107   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
    108 
    109   AddrMode2Type SelectAddrMode2Worker(SDValue N, SDValue &Base,
    110                                       SDValue &Offset, SDValue &Opc);
    111   bool SelectAddrMode2Base(SDValue N, SDValue &Base, SDValue &Offset,
    112                            SDValue &Opc) {
    113     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_BASE;
    114   }
    115 
    116   bool SelectAddrMode2ShOp(SDValue N, SDValue &Base, SDValue &Offset,
    117                            SDValue &Opc) {
    118     return SelectAddrMode2Worker(N, Base, Offset, Opc) == AM2_SHOP;
    119   }
    120 
    121   bool SelectAddrMode2(SDValue N, SDValue &Base, SDValue &Offset,
    122                        SDValue &Opc) {
    123     SelectAddrMode2Worker(N, Base, Offset, Opc);
    124 //    return SelectAddrMode2ShOp(N, Base, Offset, Opc);
    125     // This always matches one way or another.
    126     return true;
    127   }
    128 
    129   bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {
    130     const ConstantSDNode *CN = cast<ConstantSDNode>(N);
    131     Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);
    132     Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);
    133     return true;
    134   }
    135 
    136   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
    137                              SDValue &Offset, SDValue &Opc);
    138   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
    139                              SDValue &Offset, SDValue &Opc);
    140   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
    141                              SDValue &Offset, SDValue &Opc);
    142   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
    143   bool SelectAddrMode3(SDValue N, SDValue &Base,
    144                        SDValue &Offset, SDValue &Opc);
    145   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
    146                              SDValue &Offset, SDValue &Opc);
    147   bool SelectAddrMode5(SDValue N, SDValue &Base,
    148                        SDValue &Offset);
    149   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
    150   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
    151 
    152   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
    153 
    154   // Thumb Addressing Modes:
    155   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
    156   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
    157                                 SDValue &OffImm);
    158   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
    159                                  SDValue &OffImm);
    160   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
    161                                  SDValue &OffImm);
    162   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
    163                                  SDValue &OffImm);
    164   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
    165 
    166   // Thumb 2 Addressing Modes:
    167   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
    168   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
    169                             SDValue &OffImm);
    170   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
    171                                  SDValue &OffImm);
    172   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
    173                              SDValue &OffReg, SDValue &ShImm);
    174   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
    175 
    176   inline bool is_so_imm(unsigned Imm) const {
    177     return ARM_AM::getSOImmVal(Imm) != -1;
    178   }
    179 
    180   inline bool is_so_imm_not(unsigned Imm) const {
    181     return ARM_AM::getSOImmVal(~Imm) != -1;
    182   }
    183 
    184   inline bool is_t2_so_imm(unsigned Imm) const {
    185     return ARM_AM::getT2SOImmVal(Imm) != -1;
    186   }
    187 
    188   inline bool is_t2_so_imm_not(unsigned Imm) const {
    189     return ARM_AM::getT2SOImmVal(~Imm) != -1;
    190   }
    191 
    192   // Include the pieces autogenerated from the target description.
    193 #include "ARMGenDAGISel.inc"
    194 
    195 private:
    196   /// Indexed (pre/post inc/dec) load matching code for ARM.
    197   bool tryARMIndexedLoad(SDNode *N);
    198   bool tryT2IndexedLoad(SDNode *N);
    199 
    200   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
    201   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
    202   /// loads of D registers and even subregs and odd subregs of Q registers.
    203   /// For NumVecs <= 2, QOpcodes1 is not used.
    204   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
    205                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
    206                  const uint16_t *QOpcodes1);
    207 
    208   /// SelectVST - Select NEON store intrinsics.  NumVecs should
    209   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
    210   /// stores of D registers and even subregs and odd subregs of Q registers.
    211   /// For NumVecs <= 2, QOpcodes1 is not used.
    212   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
    213                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
    214                  const uint16_t *QOpcodes1);
    215 
    216   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
    217   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
    218   /// load/store of D registers and Q registers.
    219   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
    220                        unsigned NumVecs, const uint16_t *DOpcodes,
    221                        const uint16_t *QOpcodes);
    222 
    223   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
    224   /// should be 2, 3 or 4.  The opcode array specifies the instructions used
    225   /// for loading D registers.  (Q registers are not supported.)
    226   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
    227                     const uint16_t *Opcodes);
    228 
    229   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
    230   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
    231   /// generated to force the table registers to be consecutive.
    232   void SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc);
    233 
    234   /// Try to select SBFX/UBFX instructions for ARM.
    235   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
    236 
    237   // Select special operations if node forms integer ABS pattern
    238   bool tryABSOp(SDNode *N);
    239 
    240   bool tryReadRegister(SDNode *N);
    241   bool tryWriteRegister(SDNode *N);
    242 
    243   bool tryInlineAsm(SDNode *N);
    244 
    245   void SelectConcatVector(SDNode *N);
    246 
    247   bool trySMLAWSMULW(SDNode *N);
    248 
    249   void SelectCMP_SWAP(SDNode *N);
    250 
    251   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
    252   /// inline asm expressions.
    253   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
    254                                     std::vector<SDValue> &OutOps) override;
    255 
    256   // Form pairs of consecutive R, S, D, or Q registers.
    257   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
    258   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
    259   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
    260   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
    261 
    262   // Form sequences of 4 consecutive S, D, or Q registers.
    263   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    264   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    265   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
    266 
    267   // Get the alignment operand for a NEON VLD or VST instruction.
    268   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
    269                         bool is64BitVector);
    270 
    271   /// Returns the number of instructions required to materialize the given
    272   /// constant in a register, or 3 if a literal pool load is needed.
    273   unsigned ConstantMaterializationCost(unsigned Val) const;
    274 
    275   /// Checks if N is a multiplication by a constant where we can extract out a
    276   /// power of two from the constant so that it can be used in a shift, but only
    277   /// if it simplifies the materialization of the constant. Returns true if it
    278   /// is, and assigns to PowerOfTwo the power of two that should be extracted
    279   /// out and to NewMulConst the new constant to be multiplied by.
    280   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
    281                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
    282 
    283   /// Replace N with M in CurDAG, in a way that also ensures that M gets
    284   /// selected when N would have been selected.
    285   void replaceDAGValue(const SDValue &N, SDValue M);
    286 };
    287 }
    288 
    289 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
    290 /// operand. If so Imm will receive the 32-bit value.
    291 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
    292   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
    293     Imm = cast<ConstantSDNode>(N)->getZExtValue();
    294     return true;
    295   }
    296   return false;
    297 }
    298 
    299 // isInt32Immediate - This method tests to see if a constant operand.
    300 // If so Imm will receive the 32 bit value.
    301 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
    302   return isInt32Immediate(N.getNode(), Imm);
    303 }
    304 
    305 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    306 // opcode and that it has a immediate integer right operand.
    307 // If so Imm will receive the 32 bit value.
    308 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
    309   return N->getOpcode() == Opc &&
    310          isInt32Immediate(N->getOperand(1).getNode(), Imm);
    311 }
    312 
    313 /// \brief Check whether a particular node is a constant value representable as
    314 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
    315 ///
    316 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
    317 static bool isScaledConstantInRange(SDValue Node, int Scale,
    318                                     int RangeMin, int RangeMax,
    319                                     int &ScaledConstant) {
    320   assert(Scale > 0 && "Invalid scale!");
    321 
    322   // Check that this is a constant.
    323   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
    324   if (!C)
    325     return false;
    326 
    327   ScaledConstant = (int) C->getZExtValue();
    328   if ((ScaledConstant % Scale) != 0)
    329     return false;
    330 
    331   ScaledConstant /= Scale;
    332   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
    333 }
    334 
    335 void ARMDAGToDAGISel::PreprocessISelDAG() {
    336   if (!Subtarget->hasV6T2Ops())
    337     return;
    338 
    339   bool isThumb2 = Subtarget->isThumb();
    340   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
    341        E = CurDAG->allnodes_end(); I != E; ) {
    342     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
    343 
    344     if (N->getOpcode() != ISD::ADD)
    345       continue;
    346 
    347     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
    348     // leading zeros, followed by consecutive set bits, followed by 1 or 2
    349     // trailing zeros, e.g. 1020.
    350     // Transform the expression to
    351     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
    352     // of trailing zeros of c2. The left shift would be folded as an shifter
    353     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
    354     // node (UBFX).
    355 
    356     SDValue N0 = N->getOperand(0);
    357     SDValue N1 = N->getOperand(1);
    358     unsigned And_imm = 0;
    359     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
    360       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
    361         std::swap(N0, N1);
    362     }
    363     if (!And_imm)
    364       continue;
    365 
    366     // Check if the AND mask is an immediate of the form: 000.....1111111100
    367     unsigned TZ = countTrailingZeros(And_imm);
    368     if (TZ != 1 && TZ != 2)
    369       // Be conservative here. Shifter operands aren't always free. e.g. On
    370       // Swift, left shifter operand of 1 / 2 for free but others are not.
    371       // e.g.
    372       //  ubfx   r3, r1, #16, #8
    373       //  ldr.w  r3, [r0, r3, lsl #2]
    374       // vs.
    375       //  mov.w  r9, #1020
    376       //  and.w  r2, r9, r1, lsr #14
    377       //  ldr    r2, [r0, r2]
    378       continue;
    379     And_imm >>= TZ;
    380     if (And_imm & (And_imm + 1))
    381       continue;
    382 
    383     // Look for (and (srl X, c1), c2).
    384     SDValue Srl = N1.getOperand(0);
    385     unsigned Srl_imm = 0;
    386     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
    387         (Srl_imm <= 2))
    388       continue;
    389 
    390     // Make sure first operand is not a shifter operand which would prevent
    391     // folding of the left shift.
    392     SDValue CPTmp0;
    393     SDValue CPTmp1;
    394     SDValue CPTmp2;
    395     if (isThumb2) {
    396       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
    397         continue;
    398     } else {
    399       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
    400           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
    401         continue;
    402     }
    403 
    404     // Now make the transformation.
    405     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
    406                           Srl.getOperand(0),
    407                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
    408                                               MVT::i32));
    409     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
    410                          Srl,
    411                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
    412     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
    413                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
    414     CurDAG->UpdateNodeOperands(N, N0, N1);
    415   }
    416 }
    417 
    418 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
    419 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
    420 /// least on current ARM implementations) which should be avoidded.
    421 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
    422   if (OptLevel == CodeGenOpt::None)
    423     return true;
    424 
    425   if (!Subtarget->hasVMLxHazards())
    426     return true;
    427 
    428   if (!N->hasOneUse())
    429     return false;
    430 
    431   SDNode *Use = *N->use_begin();
    432   if (Use->getOpcode() == ISD::CopyToReg)
    433     return true;
    434   if (Use->isMachineOpcode()) {
    435     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
    436         CurDAG->getSubtarget().getInstrInfo());
    437 
    438     const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
    439     if (MCID.mayStore())
    440       return true;
    441     unsigned Opcode = MCID.getOpcode();
    442     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
    443       return true;
    444     // vmlx feeding into another vmlx. We actually want to unfold
    445     // the use later in the MLxExpansion pass. e.g.
    446     // vmla
    447     // vmla (stall 8 cycles)
    448     //
    449     // vmul (5 cycles)
    450     // vadd (5 cycles)
    451     // vmla
    452     // This adds up to about 18 - 19 cycles.
    453     //
    454     // vmla
    455     // vmul (stall 4 cycles)
    456     // vadd adds up to about 14 cycles.
    457     return TII->isFpMLxInstruction(Opcode);
    458   }
    459 
    460   return false;
    461 }
    462 
    463 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
    464                                             ARM_AM::ShiftOpc ShOpcVal,
    465                                             unsigned ShAmt) {
    466   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
    467     return true;
    468   if (Shift.hasOneUse())
    469     return true;
    470   // R << 2 is free.
    471   return ShOpcVal == ARM_AM::lsl &&
    472          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
    473 }
    474 
    475 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
    476   if (Subtarget->isThumb()) {
    477     if (Val <= 255) return 1;                               // MOV
    478     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
    479     if (Val <= 510) return 2;                               // MOV + ADDi8
    480     if (~Val <= 255) return 2;                              // MOV + MVN
    481     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
    482   } else {
    483     if (ARM_AM::getSOImmVal(Val) != -1) return 1;           // MOV
    484     if (ARM_AM::getSOImmVal(~Val) != -1) return 1;          // MVN
    485     if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
    486     if (ARM_AM::isSOImmTwoPartVal(Val)) return 2;           // two instrs
    487   }
    488   if (Subtarget->useMovt(*MF)) return 2; // MOVW + MOVT
    489   return 3; // Literal pool load
    490 }
    491 
    492 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
    493                                              unsigned MaxShift,
    494                                              unsigned &PowerOfTwo,
    495                                              SDValue &NewMulConst) const {
    496   assert(N.getOpcode() == ISD::MUL);
    497   assert(MaxShift > 0);
    498 
    499   // If the multiply is used in more than one place then changing the constant
    500   // will make other uses incorrect, so don't.
    501   if (!N.hasOneUse()) return false;
    502   // Check if the multiply is by a constant
    503   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
    504   if (!MulConst) return false;
    505   // If the constant is used in more than one place then modifying it will mean
    506   // we need to materialize two constants instead of one, which is a bad idea.
    507   if (!MulConst->hasOneUse()) return false;
    508   unsigned MulConstVal = MulConst->getZExtValue();
    509   if (MulConstVal == 0) return false;
    510 
    511   // Find the largest power of 2 that MulConstVal is a multiple of
    512   PowerOfTwo = MaxShift;
    513   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
    514     --PowerOfTwo;
    515     if (PowerOfTwo == 0) return false;
    516   }
    517 
    518   // Only optimise if the new cost is better
    519   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
    520   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
    521   unsigned OldCost = ConstantMaterializationCost(MulConstVal);
    522   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal);
    523   return NewCost < OldCost;
    524 }
    525 
    526 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
    527   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
    528   CurDAG->ReplaceAllUsesWith(N, M);
    529 }
    530 
    531 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
    532                                               SDValue &BaseReg,
    533                                               SDValue &Opc,
    534                                               bool CheckProfitability) {
    535   if (DisableShifterOp)
    536     return false;
    537 
    538   // If N is a multiply-by-constant and it's profitable to extract a shift and
    539   // use it in a shifted operand do so.
    540   if (N.getOpcode() == ISD::MUL) {
    541     unsigned PowerOfTwo = 0;
    542     SDValue NewMulConst;
    543     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
    544       HandleSDNode Handle(N);
    545       replaceDAGValue(N.getOperand(1), NewMulConst);
    546       BaseReg = Handle.getValue();
    547       Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl,
    548                                                           PowerOfTwo),
    549                                       SDLoc(N), MVT::i32);
    550       return true;
    551     }
    552   }
    553 
    554   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    555 
    556   // Don't match base register only case. That is matched to a separate
    557   // lower complexity pattern with explicit register operand.
    558   if (ShOpcVal == ARM_AM::no_shift) return false;
    559 
    560   BaseReg = N.getOperand(0);
    561   unsigned ShImmVal = 0;
    562   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
    563   if (!RHS) return false;
    564   ShImmVal = RHS->getZExtValue() & 31;
    565   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
    566                                   SDLoc(N), MVT::i32);
    567   return true;
    568 }
    569 
    570 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
    571                                               SDValue &BaseReg,
    572                                               SDValue &ShReg,
    573                                               SDValue &Opc,
    574                                               bool CheckProfitability) {
    575   if (DisableShifterOp)
    576     return false;
    577 
    578   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    579 
    580   // Don't match base register only case. That is matched to a separate
    581   // lower complexity pattern with explicit register operand.
    582   if (ShOpcVal == ARM_AM::no_shift) return false;
    583 
    584   BaseReg = N.getOperand(0);
    585   unsigned ShImmVal = 0;
    586   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
    587   if (RHS) return false;
    588 
    589   ShReg = N.getOperand(1);
    590   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
    591     return false;
    592   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
    593                                   SDLoc(N), MVT::i32);
    594   return true;
    595 }
    596 
    597 
    598 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
    599                                           SDValue &Base,
    600                                           SDValue &OffImm) {
    601   // Match simple R + imm12 operands.
    602 
    603   // Base only.
    604   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    605       !CurDAG->isBaseWithConstantOffset(N)) {
    606     if (N.getOpcode() == ISD::FrameIndex) {
    607       // Match frame index.
    608       int FI = cast<FrameIndexSDNode>(N)->getIndex();
    609       Base = CurDAG->getTargetFrameIndex(
    610           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    611       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    612       return true;
    613     }
    614 
    615     if (N.getOpcode() == ARMISD::Wrapper &&
    616         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
    617         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
    618         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
    619       Base = N.getOperand(0);
    620     } else
    621       Base = N;
    622     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    623     return true;
    624   }
    625 
    626   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    627     int RHSC = (int)RHS->getSExtValue();
    628     if (N.getOpcode() == ISD::SUB)
    629       RHSC = -RHSC;
    630 
    631     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
    632       Base   = N.getOperand(0);
    633       if (Base.getOpcode() == ISD::FrameIndex) {
    634         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    635         Base = CurDAG->getTargetFrameIndex(
    636             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    637       }
    638       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
    639       return true;
    640     }
    641   }
    642 
    643   // Base only.
    644   Base = N;
    645   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
    646   return true;
    647 }
    648 
    649 
    650 
    651 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
    652                                       SDValue &Opc) {
    653   if (N.getOpcode() == ISD::MUL &&
    654       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
    655     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    656       // X * [3,5,9] -> X + X * [2,4,8] etc.
    657       int RHSC = (int)RHS->getZExtValue();
    658       if (RHSC & 1) {
    659         RHSC = RHSC & ~1;
    660         ARM_AM::AddrOpc AddSub = ARM_AM::add;
    661         if (RHSC < 0) {
    662           AddSub = ARM_AM::sub;
    663           RHSC = - RHSC;
    664         }
    665         if (isPowerOf2_32(RHSC)) {
    666           unsigned ShAmt = Log2_32(RHSC);
    667           Base = Offset = N.getOperand(0);
    668           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
    669                                                             ARM_AM::lsl),
    670                                           SDLoc(N), MVT::i32);
    671           return true;
    672         }
    673       }
    674     }
    675   }
    676 
    677   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    678       // ISD::OR that is equivalent to an ISD::ADD.
    679       !CurDAG->isBaseWithConstantOffset(N))
    680     return false;
    681 
    682   // Leave simple R +/- imm12 operands for LDRi12
    683   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
    684     int RHSC;
    685     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
    686                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
    687       return false;
    688   }
    689 
    690   // Otherwise this is R +/- [possibly shifted] R.
    691   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
    692   ARM_AM::ShiftOpc ShOpcVal =
    693     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
    694   unsigned ShAmt = 0;
    695 
    696   Base   = N.getOperand(0);
    697   Offset = N.getOperand(1);
    698 
    699   if (ShOpcVal != ARM_AM::no_shift) {
    700     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    701     // it.
    702     if (ConstantSDNode *Sh =
    703            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
    704       ShAmt = Sh->getZExtValue();
    705       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
    706         Offset = N.getOperand(1).getOperand(0);
    707       else {
    708         ShAmt = 0;
    709         ShOpcVal = ARM_AM::no_shift;
    710       }
    711     } else {
    712       ShOpcVal = ARM_AM::no_shift;
    713     }
    714   }
    715 
    716   // Try matching (R shl C) + (R).
    717   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
    718       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
    719         N.getOperand(0).hasOneUse())) {
    720     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
    721     if (ShOpcVal != ARM_AM::no_shift) {
    722       // Check to see if the RHS of the shift is a constant, if not, we can't
    723       // fold it.
    724       if (ConstantSDNode *Sh =
    725           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
    726         ShAmt = Sh->getZExtValue();
    727         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
    728           Offset = N.getOperand(0).getOperand(0);
    729           Base = N.getOperand(1);
    730         } else {
    731           ShAmt = 0;
    732           ShOpcVal = ARM_AM::no_shift;
    733         }
    734       } else {
    735         ShOpcVal = ARM_AM::no_shift;
    736       }
    737     }
    738   }
    739 
    740   // If Offset is a multiply-by-constant and it's profitable to extract a shift
    741   // and use it in a shifted operand do so.
    742   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
    743     unsigned PowerOfTwo = 0;
    744     SDValue NewMulConst;
    745     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
    746       replaceDAGValue(Offset.getOperand(1), NewMulConst);
    747       ShAmt = PowerOfTwo;
    748       ShOpcVal = ARM_AM::lsl;
    749     }
    750   }
    751 
    752   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    753                                   SDLoc(N), MVT::i32);
    754   return true;
    755 }
    756 
    757 
    758 //-----
    759 
    760 AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
    761                                                      SDValue &Base,
    762                                                      SDValue &Offset,
    763                                                      SDValue &Opc) {
    764   if (N.getOpcode() == ISD::MUL &&
    765       (!(Subtarget->isLikeA9() || Subtarget->isSwift()) || N.hasOneUse())) {
    766     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    767       // X * [3,5,9] -> X + X * [2,4,8] etc.
    768       int RHSC = (int)RHS->getZExtValue();
    769       if (RHSC & 1) {
    770         RHSC = RHSC & ~1;
    771         ARM_AM::AddrOpc AddSub = ARM_AM::add;
    772         if (RHSC < 0) {
    773           AddSub = ARM_AM::sub;
    774           RHSC = - RHSC;
    775         }
    776         if (isPowerOf2_32(RHSC)) {
    777           unsigned ShAmt = Log2_32(RHSC);
    778           Base = Offset = N.getOperand(0);
    779           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
    780                                                             ARM_AM::lsl),
    781                                           SDLoc(N), MVT::i32);
    782           return AM2_SHOP;
    783         }
    784       }
    785     }
    786   }
    787 
    788   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
    789       // ISD::OR that is equivalent to an ADD.
    790       !CurDAG->isBaseWithConstantOffset(N)) {
    791     Base = N;
    792     if (N.getOpcode() == ISD::FrameIndex) {
    793       int FI = cast<FrameIndexSDNode>(N)->getIndex();
    794       Base = CurDAG->getTargetFrameIndex(
    795           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    796     } else if (N.getOpcode() == ARMISD::Wrapper &&
    797                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
    798                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
    799                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
    800       Base = N.getOperand(0);
    801     }
    802     Offset = CurDAG->getRegister(0, MVT::i32);
    803     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
    804                                                       ARM_AM::no_shift),
    805                                     SDLoc(N), MVT::i32);
    806     return AM2_BASE;
    807   }
    808 
    809   // Match simple R +/- imm12 operands.
    810   if (N.getOpcode() != ISD::SUB) {
    811     int RHSC;
    812     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
    813                                 -0x1000+1, 0x1000, RHSC)) { // 12 bits.
    814       Base = N.getOperand(0);
    815       if (Base.getOpcode() == ISD::FrameIndex) {
    816         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    817         Base = CurDAG->getTargetFrameIndex(
    818             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    819       }
    820       Offset = CurDAG->getRegister(0, MVT::i32);
    821 
    822       ARM_AM::AddrOpc AddSub = ARM_AM::add;
    823       if (RHSC < 0) {
    824         AddSub = ARM_AM::sub;
    825         RHSC = - RHSC;
    826       }
    827       Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, RHSC,
    828                                                         ARM_AM::no_shift),
    829                                       SDLoc(N), MVT::i32);
    830       return AM2_BASE;
    831     }
    832   }
    833 
    834   if ((Subtarget->isLikeA9() || Subtarget->isSwift()) && !N.hasOneUse()) {
    835     // Compute R +/- (R << N) and reuse it.
    836     Base = N;
    837     Offset = CurDAG->getRegister(0, MVT::i32);
    838     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(ARM_AM::add, 0,
    839                                                       ARM_AM::no_shift),
    840                                     SDLoc(N), MVT::i32);
    841     return AM2_BASE;
    842   }
    843 
    844   // Otherwise this is R +/- [possibly shifted] R.
    845   ARM_AM::AddrOpc AddSub = N.getOpcode() != ISD::SUB ? ARM_AM::add:ARM_AM::sub;
    846   ARM_AM::ShiftOpc ShOpcVal =
    847     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
    848   unsigned ShAmt = 0;
    849 
    850   Base   = N.getOperand(0);
    851   Offset = N.getOperand(1);
    852 
    853   if (ShOpcVal != ARM_AM::no_shift) {
    854     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    855     // it.
    856     if (ConstantSDNode *Sh =
    857            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
    858       ShAmt = Sh->getZExtValue();
    859       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
    860         Offset = N.getOperand(1).getOperand(0);
    861       else {
    862         ShAmt = 0;
    863         ShOpcVal = ARM_AM::no_shift;
    864       }
    865     } else {
    866       ShOpcVal = ARM_AM::no_shift;
    867     }
    868   }
    869 
    870   // Try matching (R shl C) + (R).
    871   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
    872       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
    873         N.getOperand(0).hasOneUse())) {
    874     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
    875     if (ShOpcVal != ARM_AM::no_shift) {
    876       // Check to see if the RHS of the shift is a constant, if not, we can't
    877       // fold it.
    878       if (ConstantSDNode *Sh =
    879           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
    880         ShAmt = Sh->getZExtValue();
    881         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
    882           Offset = N.getOperand(0).getOperand(0);
    883           Base = N.getOperand(1);
    884         } else {
    885           ShAmt = 0;
    886           ShOpcVal = ARM_AM::no_shift;
    887         }
    888       } else {
    889         ShOpcVal = ARM_AM::no_shift;
    890       }
    891     }
    892   }
    893 
    894   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    895                                   SDLoc(N), MVT::i32);
    896   return AM2_SHOP;
    897 }
    898 
    899 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
    900                                             SDValue &Offset, SDValue &Opc) {
    901   unsigned Opcode = Op->getOpcode();
    902   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    903     ? cast<LoadSDNode>(Op)->getAddressingMode()
    904     : cast<StoreSDNode>(Op)->getAddressingMode();
    905   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    906     ? ARM_AM::add : ARM_AM::sub;
    907   int Val;
    908   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
    909     return false;
    910 
    911   Offset = N;
    912   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
    913   unsigned ShAmt = 0;
    914   if (ShOpcVal != ARM_AM::no_shift) {
    915     // Check to see if the RHS of the shift is a constant, if not, we can't fold
    916     // it.
    917     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    918       ShAmt = Sh->getZExtValue();
    919       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
    920         Offset = N.getOperand(0);
    921       else {
    922         ShAmt = 0;
    923         ShOpcVal = ARM_AM::no_shift;
    924       }
    925     } else {
    926       ShOpcVal = ARM_AM::no_shift;
    927     }
    928   }
    929 
    930   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
    931                                   SDLoc(N), MVT::i32);
    932   return true;
    933 }
    934 
    935 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
    936                                             SDValue &Offset, SDValue &Opc) {
    937   unsigned Opcode = Op->getOpcode();
    938   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    939     ? cast<LoadSDNode>(Op)->getAddressingMode()
    940     : cast<StoreSDNode>(Op)->getAddressingMode();
    941   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    942     ? ARM_AM::add : ARM_AM::sub;
    943   int Val;
    944   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
    945     if (AddSub == ARM_AM::sub) Val *= -1;
    946     Offset = CurDAG->getRegister(0, MVT::i32);
    947     Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);
    948     return true;
    949   }
    950 
    951   return false;
    952 }
    953 
    954 
    955 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
    956                                             SDValue &Offset, SDValue &Opc) {
    957   unsigned Opcode = Op->getOpcode();
    958   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
    959     ? cast<LoadSDNode>(Op)->getAddressingMode()
    960     : cast<StoreSDNode>(Op)->getAddressingMode();
    961   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
    962     ? ARM_AM::add : ARM_AM::sub;
    963   int Val;
    964   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
    965     Offset = CurDAG->getRegister(0, MVT::i32);
    966     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
    967                                                       ARM_AM::no_shift),
    968                                     SDLoc(Op), MVT::i32);
    969     return true;
    970   }
    971 
    972   return false;
    973 }
    974 
    975 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
    976   Base = N;
    977   return true;
    978 }
    979 
    980 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
    981                                       SDValue &Base, SDValue &Offset,
    982                                       SDValue &Opc) {
    983   if (N.getOpcode() == ISD::SUB) {
    984     // X - C  is canonicalize to X + -C, no need to handle it here.
    985     Base = N.getOperand(0);
    986     Offset = N.getOperand(1);
    987     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
    988                                     MVT::i32);
    989     return true;
    990   }
    991 
    992   if (!CurDAG->isBaseWithConstantOffset(N)) {
    993     Base = N;
    994     if (N.getOpcode() == ISD::FrameIndex) {
    995       int FI = cast<FrameIndexSDNode>(N)->getIndex();
    996       Base = CurDAG->getTargetFrameIndex(
    997           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    998     }
    999     Offset = CurDAG->getRegister(0, MVT::i32);
   1000     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
   1001                                     MVT::i32);
   1002     return true;
   1003   }
   1004 
   1005   // If the RHS is +/- imm8, fold into addr mode.
   1006   int RHSC;
   1007   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
   1008                               -256 + 1, 256, RHSC)) { // 8 bits.
   1009     Base = N.getOperand(0);
   1010     if (Base.getOpcode() == ISD::FrameIndex) {
   1011       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1012       Base = CurDAG->getTargetFrameIndex(
   1013           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1014     }
   1015     Offset = CurDAG->getRegister(0, MVT::i32);
   1016 
   1017     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1018     if (RHSC < 0) {
   1019       AddSub = ARM_AM::sub;
   1020       RHSC = -RHSC;
   1021     }
   1022     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
   1023                                     MVT::i32);
   1024     return true;
   1025   }
   1026 
   1027   Base = N.getOperand(0);
   1028   Offset = N.getOperand(1);
   1029   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
   1030                                   MVT::i32);
   1031   return true;
   1032 }
   1033 
   1034 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
   1035                                             SDValue &Offset, SDValue &Opc) {
   1036   unsigned Opcode = Op->getOpcode();
   1037   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
   1038     ? cast<LoadSDNode>(Op)->getAddressingMode()
   1039     : cast<StoreSDNode>(Op)->getAddressingMode();
   1040   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
   1041     ? ARM_AM::add : ARM_AM::sub;
   1042   int Val;
   1043   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
   1044     Offset = CurDAG->getRegister(0, MVT::i32);
   1045     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
   1046                                     MVT::i32);
   1047     return true;
   1048   }
   1049 
   1050   Offset = N;
   1051   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
   1052                                   MVT::i32);
   1053   return true;
   1054 }
   1055 
   1056 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
   1057                                       SDValue &Base, SDValue &Offset) {
   1058   if (!CurDAG->isBaseWithConstantOffset(N)) {
   1059     Base = N;
   1060     if (N.getOpcode() == ISD::FrameIndex) {
   1061       int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1062       Base = CurDAG->getTargetFrameIndex(
   1063           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1064     } else if (N.getOpcode() == ARMISD::Wrapper &&
   1065                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
   1066                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
   1067                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
   1068       Base = N.getOperand(0);
   1069     }
   1070     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
   1071                                        SDLoc(N), MVT::i32);
   1072     return true;
   1073   }
   1074 
   1075   // If the RHS is +/- imm8, fold into addr mode.
   1076   int RHSC;
   1077   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4,
   1078                               -256 + 1, 256, RHSC)) {
   1079     Base = N.getOperand(0);
   1080     if (Base.getOpcode() == ISD::FrameIndex) {
   1081       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1082       Base = CurDAG->getTargetFrameIndex(
   1083           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1084     }
   1085 
   1086     ARM_AM::AddrOpc AddSub = ARM_AM::add;
   1087     if (RHSC < 0) {
   1088       AddSub = ARM_AM::sub;
   1089       RHSC = -RHSC;
   1090     }
   1091     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
   1092                                        SDLoc(N), MVT::i32);
   1093     return true;
   1094   }
   1095 
   1096   Base = N;
   1097   Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
   1098                                      SDLoc(N), MVT::i32);
   1099   return true;
   1100 }
   1101 
   1102 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
   1103                                       SDValue &Align) {
   1104   Addr = N;
   1105 
   1106   unsigned Alignment = 0;
   1107 
   1108   MemSDNode *MemN = cast<MemSDNode>(Parent);
   1109 
   1110   if (isa<LSBaseSDNode>(MemN) ||
   1111       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
   1112         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
   1113        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
   1114     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
   1115     // The maximum alignment is equal to the memory size being referenced.
   1116     unsigned MMOAlign = MemN->getAlignment();
   1117     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
   1118     if (MMOAlign >= MemSize && MemSize > 1)
   1119       Alignment = MemSize;
   1120   } else {
   1121     // All other uses of addrmode6 are for intrinsics.  For now just record
   1122     // the raw alignment value; it will be refined later based on the legal
   1123     // alignment operands for the intrinsic.
   1124     Alignment = MemN->getAlignment();
   1125   }
   1126 
   1127   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
   1128   return true;
   1129 }
   1130 
   1131 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
   1132                                             SDValue &Offset) {
   1133   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
   1134   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
   1135   if (AM != ISD::POST_INC)
   1136     return false;
   1137   Offset = N;
   1138   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
   1139     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
   1140       Offset = CurDAG->getRegister(0, MVT::i32);
   1141   }
   1142   return true;
   1143 }
   1144 
   1145 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
   1146                                        SDValue &Offset, SDValue &Label) {
   1147   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
   1148     Offset = N.getOperand(0);
   1149     SDValue N1 = N.getOperand(1);
   1150     Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
   1151                                       SDLoc(N), MVT::i32);
   1152     return true;
   1153   }
   1154 
   1155   return false;
   1156 }
   1157 
   1158 
   1159 //===----------------------------------------------------------------------===//
   1160 //                         Thumb Addressing Modes
   1161 //===----------------------------------------------------------------------===//
   1162 
   1163 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N,
   1164                                             SDValue &Base, SDValue &Offset){
   1165   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
   1166     ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N);
   1167     if (!NC || !NC->isNullValue())
   1168       return false;
   1169 
   1170     Base = Offset = N;
   1171     return true;
   1172   }
   1173 
   1174   Base = N.getOperand(0);
   1175   Offset = N.getOperand(1);
   1176   return true;
   1177 }
   1178 
   1179 bool
   1180 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
   1181                                           SDValue &Base, SDValue &OffImm) {
   1182   if (!CurDAG->isBaseWithConstantOffset(N)) {
   1183     if (N.getOpcode() == ISD::ADD) {
   1184       return false; // We want to select register offset instead
   1185     } else if (N.getOpcode() == ARMISD::Wrapper &&
   1186         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
   1187         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
   1188         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
   1189       Base = N.getOperand(0);
   1190     } else {
   1191       Base = N;
   1192     }
   1193 
   1194     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1195     return true;
   1196   }
   1197 
   1198   // If the RHS is + imm5 * scale, fold into addr mode.
   1199   int RHSC;
   1200   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
   1201     Base = N.getOperand(0);
   1202     OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1203     return true;
   1204   }
   1205 
   1206   // Offset is too large, so use register offset instead.
   1207   return false;
   1208 }
   1209 
   1210 bool
   1211 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
   1212                                            SDValue &OffImm) {
   1213   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
   1214 }
   1215 
   1216 bool
   1217 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
   1218                                            SDValue &OffImm) {
   1219   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
   1220 }
   1221 
   1222 bool
   1223 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
   1224                                            SDValue &OffImm) {
   1225   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
   1226 }
   1227 
   1228 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
   1229                                             SDValue &Base, SDValue &OffImm) {
   1230   if (N.getOpcode() == ISD::FrameIndex) {
   1231     int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1232     // Only multiples of 4 are allowed for the offset, so the frame object
   1233     // alignment must be at least 4.
   1234     MachineFrameInfo *MFI = MF->getFrameInfo();
   1235     if (MFI->getObjectAlignment(FI) < 4)
   1236       MFI->setObjectAlignment(FI, 4);
   1237     Base = CurDAG->getTargetFrameIndex(
   1238         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1239     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1240     return true;
   1241   }
   1242 
   1243   if (!CurDAG->isBaseWithConstantOffset(N))
   1244     return false;
   1245 
   1246   RegisterSDNode *LHSR = dyn_cast<RegisterSDNode>(N.getOperand(0));
   1247   if (N.getOperand(0).getOpcode() == ISD::FrameIndex ||
   1248       (LHSR && LHSR->getReg() == ARM::SP)) {
   1249     // If the RHS is + imm8 * scale, fold into addr mode.
   1250     int RHSC;
   1251     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
   1252       Base = N.getOperand(0);
   1253       if (Base.getOpcode() == ISD::FrameIndex) {
   1254         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1255         // For LHS+RHS to result in an offset that's a multiple of 4 the object
   1256         // indexed by the LHS must be 4-byte aligned.
   1257         MachineFrameInfo *MFI = MF->getFrameInfo();
   1258         if (MFI->getObjectAlignment(FI) < 4)
   1259           MFI->setObjectAlignment(FI, 4);
   1260         Base = CurDAG->getTargetFrameIndex(
   1261             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1262       }
   1263       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1264       return true;
   1265     }
   1266   }
   1267 
   1268   return false;
   1269 }
   1270 
   1271 
   1272 //===----------------------------------------------------------------------===//
   1273 //                        Thumb 2 Addressing Modes
   1274 //===----------------------------------------------------------------------===//
   1275 
   1276 
   1277 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
   1278                                             SDValue &Base, SDValue &OffImm) {
   1279   // Match simple R + imm12 operands.
   1280 
   1281   // Base only.
   1282   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
   1283       !CurDAG->isBaseWithConstantOffset(N)) {
   1284     if (N.getOpcode() == ISD::FrameIndex) {
   1285       // Match frame index.
   1286       int FI = cast<FrameIndexSDNode>(N)->getIndex();
   1287       Base = CurDAG->getTargetFrameIndex(
   1288           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1289       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1290       return true;
   1291     }
   1292 
   1293     if (N.getOpcode() == ARMISD::Wrapper &&
   1294         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
   1295         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
   1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
   1297       Base = N.getOperand(0);
   1298       if (Base.getOpcode() == ISD::TargetConstantPool)
   1299         return false;  // We want to select t2LDRpci instead.
   1300     } else
   1301       Base = N;
   1302     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1303     return true;
   1304   }
   1305 
   1306   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1307     if (SelectT2AddrModeImm8(N, Base, OffImm))
   1308       // Let t2LDRi8 handle (R - imm8).
   1309       return false;
   1310 
   1311     int RHSC = (int)RHS->getZExtValue();
   1312     if (N.getOpcode() == ISD::SUB)
   1313       RHSC = -RHSC;
   1314 
   1315     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
   1316       Base   = N.getOperand(0);
   1317       if (Base.getOpcode() == ISD::FrameIndex) {
   1318         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1319         Base = CurDAG->getTargetFrameIndex(
   1320             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1321       }
   1322       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1323       return true;
   1324     }
   1325   }
   1326 
   1327   // Base only.
   1328   Base = N;
   1329   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1330   return true;
   1331 }
   1332 
   1333 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
   1334                                            SDValue &Base, SDValue &OffImm) {
   1335   // Match simple R - imm8 operands.
   1336   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
   1337       !CurDAG->isBaseWithConstantOffset(N))
   1338     return false;
   1339 
   1340   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1341     int RHSC = (int)RHS->getSExtValue();
   1342     if (N.getOpcode() == ISD::SUB)
   1343       RHSC = -RHSC;
   1344 
   1345     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
   1346       Base = N.getOperand(0);
   1347       if (Base.getOpcode() == ISD::FrameIndex) {
   1348         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1349         Base = CurDAG->getTargetFrameIndex(
   1350             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1351       }
   1352       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);
   1353       return true;
   1354     }
   1355   }
   1356 
   1357   return false;
   1358 }
   1359 
   1360 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
   1361                                                  SDValue &OffImm){
   1362   unsigned Opcode = Op->getOpcode();
   1363   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
   1364     ? cast<LoadSDNode>(Op)->getAddressingMode()
   1365     : cast<StoreSDNode>(Op)->getAddressingMode();
   1366   int RHSC;
   1367   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
   1368     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
   1369       ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)
   1370       : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);
   1371     return true;
   1372   }
   1373 
   1374   return false;
   1375 }
   1376 
   1377 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
   1378                                             SDValue &Base,
   1379                                             SDValue &OffReg, SDValue &ShImm) {
   1380   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
   1381   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
   1382     return false;
   1383 
   1384   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
   1385   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
   1386     int RHSC = (int)RHS->getZExtValue();
   1387     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
   1388       return false;
   1389     else if (RHSC < 0 && RHSC >= -255) // 8 bits
   1390       return false;
   1391   }
   1392 
   1393   // Look for (R + R) or (R + (R << [1,2,3])).
   1394   unsigned ShAmt = 0;
   1395   Base   = N.getOperand(0);
   1396   OffReg = N.getOperand(1);
   1397 
   1398   // Swap if it is ((R << c) + R).
   1399   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
   1400   if (ShOpcVal != ARM_AM::lsl) {
   1401     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
   1402     if (ShOpcVal == ARM_AM::lsl)
   1403       std::swap(Base, OffReg);
   1404   }
   1405 
   1406   if (ShOpcVal == ARM_AM::lsl) {
   1407     // Check to see if the RHS of the shift is a constant, if not, we can't fold
   1408     // it.
   1409     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
   1410       ShAmt = Sh->getZExtValue();
   1411       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
   1412         OffReg = OffReg.getOperand(0);
   1413       else {
   1414         ShAmt = 0;
   1415       }
   1416     }
   1417   }
   1418 
   1419   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
   1420   // and use it in a shifted operand do so.
   1421   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
   1422     unsigned PowerOfTwo = 0;
   1423     SDValue NewMulConst;
   1424     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
   1425       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
   1426       ShAmt = PowerOfTwo;
   1427     }
   1428   }
   1429 
   1430   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
   1431 
   1432   return true;
   1433 }
   1434 
   1435 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
   1436                                                 SDValue &OffImm) {
   1437   // This *must* succeed since it's used for the irreplaceable ldrex and strex
   1438   // instructions.
   1439   Base = N;
   1440   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
   1441 
   1442   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
   1443     return true;
   1444 
   1445   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
   1446   if (!RHS)
   1447     return true;
   1448 
   1449   uint32_t RHSC = (int)RHS->getZExtValue();
   1450   if (RHSC > 1020 || RHSC % 4 != 0)
   1451     return true;
   1452 
   1453   Base = N.getOperand(0);
   1454   if (Base.getOpcode() == ISD::FrameIndex) {
   1455     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
   1456     Base = CurDAG->getTargetFrameIndex(
   1457         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   1458   }
   1459 
   1460   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
   1461   return true;
   1462 }
   1463 
   1464 //===--------------------------------------------------------------------===//
   1465 
   1466 /// getAL - Returns a ARMCC::AL immediate node.
   1467 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
   1468   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
   1469 }
   1470 
   1471 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
   1472   LoadSDNode *LD = cast<LoadSDNode>(N);
   1473   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1474   if (AM == ISD::UNINDEXED)
   1475     return false;
   1476 
   1477   EVT LoadedVT = LD->getMemoryVT();
   1478   SDValue Offset, AMOpc;
   1479   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   1480   unsigned Opcode = 0;
   1481   bool Match = false;
   1482   if (LoadedVT == MVT::i32 && isPre &&
   1483       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
   1484     Opcode = ARM::LDR_PRE_IMM;
   1485     Match = true;
   1486   } else if (LoadedVT == MVT::i32 && !isPre &&
   1487       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
   1488     Opcode = ARM::LDR_POST_IMM;
   1489     Match = true;
   1490   } else if (LoadedVT == MVT::i32 &&
   1491       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
   1492     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
   1493     Match = true;
   1494 
   1495   } else if (LoadedVT == MVT::i16 &&
   1496              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
   1497     Match = true;
   1498     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
   1499       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
   1500       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
   1501   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
   1502     if (LD->getExtensionType() == ISD::SEXTLOAD) {
   1503       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
   1504         Match = true;
   1505         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
   1506       }
   1507     } else {
   1508       if (isPre &&
   1509           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
   1510         Match = true;
   1511         Opcode = ARM::LDRB_PRE_IMM;
   1512       } else if (!isPre &&
   1513                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
   1514         Match = true;
   1515         Opcode = ARM::LDRB_POST_IMM;
   1516       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
   1517         Match = true;
   1518         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
   1519       }
   1520     }
   1521   }
   1522 
   1523   if (Match) {
   1524     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
   1525       SDValue Chain = LD->getChain();
   1526       SDValue Base = LD->getBasePtr();
   1527       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
   1528                        CurDAG->getRegister(0, MVT::i32), Chain };
   1529       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
   1530                                             MVT::i32, MVT::Other, Ops));
   1531       return true;
   1532     } else {
   1533       SDValue Chain = LD->getChain();
   1534       SDValue Base = LD->getBasePtr();
   1535       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
   1536                        CurDAG->getRegister(0, MVT::i32), Chain };
   1537       ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
   1538                                             MVT::i32, MVT::Other, Ops));
   1539       return true;
   1540     }
   1541   }
   1542 
   1543   return false;
   1544 }
   1545 
   1546 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
   1547   LoadSDNode *LD = cast<LoadSDNode>(N);
   1548   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1549   if (AM == ISD::UNINDEXED)
   1550     return false;
   1551 
   1552   EVT LoadedVT = LD->getMemoryVT();
   1553   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
   1554   SDValue Offset;
   1555   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
   1556   unsigned Opcode = 0;
   1557   bool Match = false;
   1558   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
   1559     switch (LoadedVT.getSimpleVT().SimpleTy) {
   1560     case MVT::i32:
   1561       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
   1562       break;
   1563     case MVT::i16:
   1564       if (isSExtLd)
   1565         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
   1566       else
   1567         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
   1568       break;
   1569     case MVT::i8:
   1570     case MVT::i1:
   1571       if (isSExtLd)
   1572         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
   1573       else
   1574         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
   1575       break;
   1576     default:
   1577       return false;
   1578     }
   1579     Match = true;
   1580   }
   1581 
   1582   if (Match) {
   1583     SDValue Chain = LD->getChain();
   1584     SDValue Base = LD->getBasePtr();
   1585     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
   1586                      CurDAG->getRegister(0, MVT::i32), Chain };
   1587     ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
   1588                                           MVT::Other, Ops));
   1589     return true;
   1590   }
   1591 
   1592   return false;
   1593 }
   1594 
   1595 /// \brief Form a GPRPair pseudo register from a pair of GPR regs.
   1596 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
   1597   SDLoc dl(V0.getNode());
   1598   SDValue RegClass =
   1599     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
   1600   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
   1601   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
   1602   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1603   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1604 }
   1605 
   1606 /// \brief Form a D register from a pair of S registers.
   1607 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1608   SDLoc dl(V0.getNode());
   1609   SDValue RegClass =
   1610     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
   1611   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
   1612   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
   1613   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1614   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1615 }
   1616 
   1617 /// \brief Form a quad register from a pair of D registers.
   1618 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1619   SDLoc dl(V0.getNode());
   1620   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
   1621                                                MVT::i32);
   1622   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
   1623   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
   1624   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1625   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1626 }
   1627 
   1628 /// \brief Form 4 consecutive D registers from a pair of Q registers.
   1629 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
   1630   SDLoc dl(V0.getNode());
   1631   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
   1632                                                MVT::i32);
   1633   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
   1634   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
   1635   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
   1636   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1637 }
   1638 
   1639 /// \brief Form 4 consecutive S registers.
   1640 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
   1641                                    SDValue V2, SDValue V3) {
   1642   SDLoc dl(V0.getNode());
   1643   SDValue RegClass =
   1644     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
   1645   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
   1646   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
   1647   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
   1648   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
   1649   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1650                                     V2, SubReg2, V3, SubReg3 };
   1651   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1652 }
   1653 
   1654 /// \brief Form 4 consecutive D registers.
   1655 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
   1656                                    SDValue V2, SDValue V3) {
   1657   SDLoc dl(V0.getNode());
   1658   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
   1659                                                MVT::i32);
   1660   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
   1661   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
   1662   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
   1663   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
   1664   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1665                                     V2, SubReg2, V3, SubReg3 };
   1666   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1667 }
   1668 
   1669 /// \brief Form 4 consecutive Q registers.
   1670 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
   1671                                    SDValue V2, SDValue V3) {
   1672   SDLoc dl(V0.getNode());
   1673   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
   1674                                                MVT::i32);
   1675   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
   1676   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
   1677   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
   1678   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
   1679   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
   1680                                     V2, SubReg2, V3, SubReg3 };
   1681   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
   1682 }
   1683 
   1684 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
   1685 /// of a NEON VLD or VST instruction.  The supported values depend on the
   1686 /// number of registers being loaded.
   1687 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
   1688                                        unsigned NumVecs, bool is64BitVector) {
   1689   unsigned NumRegs = NumVecs;
   1690   if (!is64BitVector && NumVecs < 3)
   1691     NumRegs *= 2;
   1692 
   1693   unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   1694   if (Alignment >= 32 && NumRegs == 4)
   1695     Alignment = 32;
   1696   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
   1697     Alignment = 16;
   1698   else if (Alignment >= 8)
   1699     Alignment = 8;
   1700   else
   1701     Alignment = 0;
   1702 
   1703   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   1704 }
   1705 
   1706 static bool isVLDfixed(unsigned Opc)
   1707 {
   1708   switch (Opc) {
   1709   default: return false;
   1710   case ARM::VLD1d8wb_fixed : return true;
   1711   case ARM::VLD1d16wb_fixed : return true;
   1712   case ARM::VLD1d64Qwb_fixed : return true;
   1713   case ARM::VLD1d32wb_fixed : return true;
   1714   case ARM::VLD1d64wb_fixed : return true;
   1715   case ARM::VLD1d64TPseudoWB_fixed : return true;
   1716   case ARM::VLD1d64QPseudoWB_fixed : return true;
   1717   case ARM::VLD1q8wb_fixed : return true;
   1718   case ARM::VLD1q16wb_fixed : return true;
   1719   case ARM::VLD1q32wb_fixed : return true;
   1720   case ARM::VLD1q64wb_fixed : return true;
   1721   case ARM::VLD2d8wb_fixed : return true;
   1722   case ARM::VLD2d16wb_fixed : return true;
   1723   case ARM::VLD2d32wb_fixed : return true;
   1724   case ARM::VLD2q8PseudoWB_fixed : return true;
   1725   case ARM::VLD2q16PseudoWB_fixed : return true;
   1726   case ARM::VLD2q32PseudoWB_fixed : return true;
   1727   case ARM::VLD2DUPd8wb_fixed : return true;
   1728   case ARM::VLD2DUPd16wb_fixed : return true;
   1729   case ARM::VLD2DUPd32wb_fixed : return true;
   1730   }
   1731 }
   1732 
   1733 static bool isVSTfixed(unsigned Opc)
   1734 {
   1735   switch (Opc) {
   1736   default: return false;
   1737   case ARM::VST1d8wb_fixed : return true;
   1738   case ARM::VST1d16wb_fixed : return true;
   1739   case ARM::VST1d32wb_fixed : return true;
   1740   case ARM::VST1d64wb_fixed : return true;
   1741   case ARM::VST1q8wb_fixed : return true;
   1742   case ARM::VST1q16wb_fixed : return true;
   1743   case ARM::VST1q32wb_fixed : return true;
   1744   case ARM::VST1q64wb_fixed : return true;
   1745   case ARM::VST1d64TPseudoWB_fixed : return true;
   1746   case ARM::VST1d64QPseudoWB_fixed : return true;
   1747   case ARM::VST2d8wb_fixed : return true;
   1748   case ARM::VST2d16wb_fixed : return true;
   1749   case ARM::VST2d32wb_fixed : return true;
   1750   case ARM::VST2q8PseudoWB_fixed : return true;
   1751   case ARM::VST2q16PseudoWB_fixed : return true;
   1752   case ARM::VST2q32PseudoWB_fixed : return true;
   1753   }
   1754 }
   1755 
   1756 // Get the register stride update opcode of a VLD/VST instruction that
   1757 // is otherwise equivalent to the given fixed stride updating instruction.
   1758 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   1759   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
   1760     && "Incorrect fixed stride updating instruction.");
   1761   switch (Opc) {
   1762   default: break;
   1763   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
   1764   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
   1765   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
   1766   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
   1767   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
   1768   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
   1769   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
   1770   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
   1771   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
   1772   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
   1773   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
   1774   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
   1775 
   1776   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
   1777   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
   1778   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
   1779   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
   1780   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
   1781   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
   1782   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
   1783   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
   1784   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
   1785   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
   1786 
   1787   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
   1788   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
   1789   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
   1790   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
   1791   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
   1792   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
   1793 
   1794   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
   1795   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
   1796   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
   1797   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
   1798   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
   1799   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
   1800 
   1801   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
   1802   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
   1803   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
   1804   }
   1805   return Opc; // If not one we handle, return it unchanged.
   1806 }
   1807 
   1808 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
   1809                                 const uint16_t *DOpcodes,
   1810                                 const uint16_t *QOpcodes0,
   1811                                 const uint16_t *QOpcodes1) {
   1812   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
   1813   SDLoc dl(N);
   1814 
   1815   SDValue MemAddr, Align;
   1816   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   1817   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   1818     return;
   1819 
   1820   SDValue Chain = N->getOperand(0);
   1821   EVT VT = N->getValueType(0);
   1822   bool is64BitVector = VT.is64BitVector();
   1823   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
   1824 
   1825   unsigned OpcodeIndex;
   1826   switch (VT.getSimpleVT().SimpleTy) {
   1827   default: llvm_unreachable("unhandled vld type");
   1828     // Double-register operations:
   1829   case MVT::v8i8:  OpcodeIndex = 0; break;
   1830   case MVT::v4i16: OpcodeIndex = 1; break;
   1831   case MVT::v2f32:
   1832   case MVT::v2i32: OpcodeIndex = 2; break;
   1833   case MVT::v1i64: OpcodeIndex = 3; break;
   1834     // Quad-register operations:
   1835   case MVT::v16i8: OpcodeIndex = 0; break;
   1836   case MVT::v8i16: OpcodeIndex = 1; break;
   1837   case MVT::v4f32:
   1838   case MVT::v4i32: OpcodeIndex = 2; break;
   1839   case MVT::v2f64:
   1840   case MVT::v2i64: OpcodeIndex = 3;
   1841     assert(NumVecs == 1 && "v2i64 type only supported for VLD1");
   1842     break;
   1843   }
   1844 
   1845   EVT ResTy;
   1846   if (NumVecs == 1)
   1847     ResTy = VT;
   1848   else {
   1849     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   1850     if (!is64BitVector)
   1851       ResTyElts *= 2;
   1852     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
   1853   }
   1854   std::vector<EVT> ResTys;
   1855   ResTys.push_back(ResTy);
   1856   if (isUpdating)
   1857     ResTys.push_back(MVT::i32);
   1858   ResTys.push_back(MVT::Other);
   1859 
   1860   SDValue Pred = getAL(CurDAG, dl);
   1861   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   1862   SDNode *VLd;
   1863   SmallVector<SDValue, 7> Ops;
   1864 
   1865   // Double registers and VLD1/VLD2 quad registers are directly supported.
   1866   if (is64BitVector || NumVecs <= 2) {
   1867     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   1868                     QOpcodes0[OpcodeIndex]);
   1869     Ops.push_back(MemAddr);
   1870     Ops.push_back(Align);
   1871     if (isUpdating) {
   1872       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   1873       // FIXME: VLD1/VLD2 fixed increment doesn't need Reg0. Remove the reg0
   1874       // case entirely when the rest are updated to that form, too.
   1875       if ((NumVecs <= 2) && !isa<ConstantSDNode>(Inc.getNode()))
   1876         Opc = getVLDSTRegisterUpdateOpcode(Opc);
   1877       // FIXME: We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
   1878       // check for that explicitly too. Horribly hacky, but temporary.
   1879       if ((NumVecs > 2 && !isVLDfixed(Opc)) ||
   1880           !isa<ConstantSDNode>(Inc.getNode()))
   1881         Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
   1882     }
   1883     Ops.push_back(Pred);
   1884     Ops.push_back(Reg0);
   1885     Ops.push_back(Chain);
   1886     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1887 
   1888   } else {
   1889     // Otherwise, quad registers are loaded with two separate instructions,
   1890     // where one loads the even registers and the other loads the odd registers.
   1891     EVT AddrTy = MemAddr.getValueType();
   1892 
   1893     // Load the even subregs.  This is always an updating load, so that it
   1894     // provides the address to the second load for the odd subregs.
   1895     SDValue ImplDef =
   1896       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
   1897     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
   1898     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
   1899                                           ResTy, AddrTy, MVT::Other, OpsA);
   1900     Chain = SDValue(VLdA, 2);
   1901 
   1902     // Load the odd subregs.
   1903     Ops.push_back(SDValue(VLdA, 1));
   1904     Ops.push_back(Align);
   1905     if (isUpdating) {
   1906       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   1907       assert(isa<ConstantSDNode>(Inc.getNode()) &&
   1908              "only constant post-increment update allowed for VLD3/4");
   1909       (void)Inc;
   1910       Ops.push_back(Reg0);
   1911     }
   1912     Ops.push_back(SDValue(VLdA, 0));
   1913     Ops.push_back(Pred);
   1914     Ops.push_back(Reg0);
   1915     Ops.push_back(Chain);
   1916     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
   1917   }
   1918 
   1919   // Transfer memoperands.
   1920   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1921   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1922   cast<MachineSDNode>(VLd)->setMemRefs(MemOp, MemOp + 1);
   1923 
   1924   if (NumVecs == 1) {
   1925     ReplaceNode(N, VLd);
   1926     return;
   1927   }
   1928 
   1929   // Extract out the subregisters.
   1930   SDValue SuperReg = SDValue(VLd, 0);
   1931   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
   1932                     ARM::qsub_3 == ARM::qsub_0 + 3,
   1933                 "Unexpected subreg numbering");
   1934   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
   1935   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   1936     ReplaceUses(SDValue(N, Vec),
   1937                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
   1938   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
   1939   if (isUpdating)
   1940     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
   1941   CurDAG->RemoveDeadNode(N);
   1942 }
   1943 
   1944 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
   1945                                 const uint16_t *DOpcodes,
   1946                                 const uint16_t *QOpcodes0,
   1947                                 const uint16_t *QOpcodes1) {
   1948   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
   1949   SDLoc dl(N);
   1950 
   1951   SDValue MemAddr, Align;
   1952   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   1953   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
   1954   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   1955     return;
   1956 
   1957   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1958   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1959 
   1960   SDValue Chain = N->getOperand(0);
   1961   EVT VT = N->getOperand(Vec0Idx).getValueType();
   1962   bool is64BitVector = VT.is64BitVector();
   1963   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
   1964 
   1965   unsigned OpcodeIndex;
   1966   switch (VT.getSimpleVT().SimpleTy) {
   1967   default: llvm_unreachable("unhandled vst type");
   1968     // Double-register operations:
   1969   case MVT::v8i8:  OpcodeIndex = 0; break;
   1970   case MVT::v4i16: OpcodeIndex = 1; break;
   1971   case MVT::v2f32:
   1972   case MVT::v2i32: OpcodeIndex = 2; break;
   1973   case MVT::v1i64: OpcodeIndex = 3; break;
   1974     // Quad-register operations:
   1975   case MVT::v16i8: OpcodeIndex = 0; break;
   1976   case MVT::v8i16: OpcodeIndex = 1; break;
   1977   case MVT::v4f32:
   1978   case MVT::v4i32: OpcodeIndex = 2; break;
   1979   case MVT::v2f64:
   1980   case MVT::v2i64: OpcodeIndex = 3;
   1981     assert(NumVecs == 1 && "v2i64 type only supported for VST1");
   1982     break;
   1983   }
   1984 
   1985   std::vector<EVT> ResTys;
   1986   if (isUpdating)
   1987     ResTys.push_back(MVT::i32);
   1988   ResTys.push_back(MVT::Other);
   1989 
   1990   SDValue Pred = getAL(CurDAG, dl);
   1991   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   1992   SmallVector<SDValue, 7> Ops;
   1993 
   1994   // Double registers and VST1/VST2 quad registers are directly supported.
   1995   if (is64BitVector || NumVecs <= 2) {
   1996     SDValue SrcReg;
   1997     if (NumVecs == 1) {
   1998       SrcReg = N->getOperand(Vec0Idx);
   1999     } else if (is64BitVector) {
   2000       // Form a REG_SEQUENCE to force register allocation.
   2001       SDValue V0 = N->getOperand(Vec0Idx + 0);
   2002       SDValue V1 = N->getOperand(Vec0Idx + 1);
   2003       if (NumVecs == 2)
   2004         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
   2005       else {
   2006         SDValue V2 = N->getOperand(Vec0Idx + 2);
   2007         // If it's a vst3, form a quad D-register and leave the last part as
   2008         // an undef.
   2009         SDValue V3 = (NumVecs == 3)
   2010           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
   2011           : N->getOperand(Vec0Idx + 3);
   2012         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2013       }
   2014     } else {
   2015       // Form a QQ register.
   2016       SDValue Q0 = N->getOperand(Vec0Idx);
   2017       SDValue Q1 = N->getOperand(Vec0Idx + 1);
   2018       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
   2019     }
   2020 
   2021     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   2022                     QOpcodes0[OpcodeIndex]);
   2023     Ops.push_back(MemAddr);
   2024     Ops.push_back(Align);
   2025     if (isUpdating) {
   2026       SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2027       // FIXME: VST1/VST2 fixed increment doesn't need Reg0. Remove the reg0
   2028       // case entirely when the rest are updated to that form, too.
   2029       if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
   2030         Opc = getVLDSTRegisterUpdateOpcode(Opc);
   2031       // FIXME: We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so
   2032       // check for that explicitly too. Horribly hacky, but temporary.
   2033       if  (!isa<ConstantSDNode>(Inc.getNode()))
   2034         Ops.push_back(Inc);
   2035       else if (NumVecs > 2 && !isVSTfixed(Opc))
   2036         Ops.push_back(Reg0);
   2037     }
   2038     Ops.push_back(SrcReg);
   2039     Ops.push_back(Pred);
   2040     Ops.push_back(Reg0);
   2041     Ops.push_back(Chain);
   2042     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2043 
   2044     // Transfer memoperands.
   2045     cast<MachineSDNode>(VSt)->setMemRefs(MemOp, MemOp + 1);
   2046 
   2047     ReplaceNode(N, VSt);
   2048     return;
   2049   }
   2050 
   2051   // Otherwise, quad registers are stored with two separate instructions,
   2052   // where one stores the even registers and the other stores the odd registers.
   2053 
   2054   // Form the QQQQ REG_SEQUENCE.
   2055   SDValue V0 = N->getOperand(Vec0Idx + 0);
   2056   SDValue V1 = N->getOperand(Vec0Idx + 1);
   2057   SDValue V2 = N->getOperand(Vec0Idx + 2);
   2058   SDValue V3 = (NumVecs == 3)
   2059     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2060     : N->getOperand(Vec0Idx + 3);
   2061   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   2062 
   2063   // Store the even D registers.  This is always an updating store, so that it
   2064   // provides the address to the second store for the odd subregs.
   2065   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
   2066   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
   2067                                         MemAddr.getValueType(),
   2068                                         MVT::Other, OpsA);
   2069   cast<MachineSDNode>(VStA)->setMemRefs(MemOp, MemOp + 1);
   2070   Chain = SDValue(VStA, 1);
   2071 
   2072   // Store the odd D registers.
   2073   Ops.push_back(SDValue(VStA, 0));
   2074   Ops.push_back(Align);
   2075   if (isUpdating) {
   2076     SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2077     assert(isa<ConstantSDNode>(Inc.getNode()) &&
   2078            "only constant post-increment update allowed for VST3/4");
   2079     (void)Inc;
   2080     Ops.push_back(Reg0);
   2081   }
   2082   Ops.push_back(RegSeq);
   2083   Ops.push_back(Pred);
   2084   Ops.push_back(Reg0);
   2085   Ops.push_back(Chain);
   2086   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
   2087                                         Ops);
   2088   cast<MachineSDNode>(VStB)->setMemRefs(MemOp, MemOp + 1);
   2089   ReplaceNode(N, VStB);
   2090 }
   2091 
   2092 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
   2093                                       unsigned NumVecs,
   2094                                       const uint16_t *DOpcodes,
   2095                                       const uint16_t *QOpcodes) {
   2096   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
   2097   SDLoc dl(N);
   2098 
   2099   SDValue MemAddr, Align;
   2100   unsigned AddrOpIdx = isUpdating ? 1 : 2;
   2101   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
   2102   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
   2103     return;
   2104 
   2105   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2106   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   2107 
   2108   SDValue Chain = N->getOperand(0);
   2109   unsigned Lane =
   2110     cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue();
   2111   EVT VT = N->getOperand(Vec0Idx).getValueType();
   2112   bool is64BitVector = VT.is64BitVector();
   2113 
   2114   unsigned Alignment = 0;
   2115   if (NumVecs != 3) {
   2116     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   2117     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
   2118     if (Alignment > NumBytes)
   2119       Alignment = NumBytes;
   2120     if (Alignment < 8 && Alignment < NumBytes)
   2121       Alignment = 0;
   2122     // Alignment must be a power of two; make sure of that.
   2123     Alignment = (Alignment & -Alignment);
   2124     if (Alignment == 1)
   2125       Alignment = 0;
   2126   }
   2127   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   2128 
   2129   unsigned OpcodeIndex;
   2130   switch (VT.getSimpleVT().SimpleTy) {
   2131   default: llvm_unreachable("unhandled vld/vst lane type");
   2132     // Double-register operations:
   2133   case MVT::v8i8:  OpcodeIndex = 0; break;
   2134   case MVT::v4i16: OpcodeIndex = 1; break;
   2135   case MVT::v2f32:
   2136   case MVT::v2i32: OpcodeIndex = 2; break;
   2137     // Quad-register operations:
   2138   case MVT::v8i16: OpcodeIndex = 0; break;
   2139   case MVT::v4f32:
   2140   case MVT::v4i32: OpcodeIndex = 1; break;
   2141   }
   2142 
   2143   std::vector<EVT> ResTys;
   2144   if (IsLoad) {
   2145     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   2146     if (!is64BitVector)
   2147       ResTyElts *= 2;
   2148     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
   2149                                       MVT::i64, ResTyElts));
   2150   }
   2151   if (isUpdating)
   2152     ResTys.push_back(MVT::i32);
   2153   ResTys.push_back(MVT::Other);
   2154 
   2155   SDValue Pred = getAL(CurDAG, dl);
   2156   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2157 
   2158   SmallVector<SDValue, 8> Ops;
   2159   Ops.push_back(MemAddr);
   2160   Ops.push_back(Align);
   2161   if (isUpdating) {
   2162     SDValue Inc = N->getOperand(AddrOpIdx + 1);
   2163     Ops.push_back(isa<ConstantSDNode>(Inc.getNode()) ? Reg0 : Inc);
   2164   }
   2165 
   2166   SDValue SuperReg;
   2167   SDValue V0 = N->getOperand(Vec0Idx + 0);
   2168   SDValue V1 = N->getOperand(Vec0Idx + 1);
   2169   if (NumVecs == 2) {
   2170     if (is64BitVector)
   2171       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
   2172     else
   2173       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
   2174   } else {
   2175     SDValue V2 = N->getOperand(Vec0Idx + 2);
   2176     SDValue V3 = (NumVecs == 3)
   2177       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2178       : N->getOperand(Vec0Idx + 3);
   2179     if (is64BitVector)
   2180       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2181     else
   2182       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
   2183   }
   2184   Ops.push_back(SuperReg);
   2185   Ops.push_back(getI32Imm(Lane, dl));
   2186   Ops.push_back(Pred);
   2187   Ops.push_back(Reg0);
   2188   Ops.push_back(Chain);
   2189 
   2190   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
   2191                                   QOpcodes[OpcodeIndex]);
   2192   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2193   cast<MachineSDNode>(VLdLn)->setMemRefs(MemOp, MemOp + 1);
   2194   if (!IsLoad) {
   2195     ReplaceNode(N, VLdLn);
   2196     return;
   2197   }
   2198 
   2199   // Extract the subregisters.
   2200   SuperReg = SDValue(VLdLn, 0);
   2201   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
   2202                     ARM::qsub_3 == ARM::qsub_0 + 3,
   2203                 "Unexpected subreg numbering");
   2204   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
   2205   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   2206     ReplaceUses(SDValue(N, Vec),
   2207                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
   2208   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
   2209   if (isUpdating)
   2210     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
   2211   CurDAG->RemoveDeadNode(N);
   2212 }
   2213 
   2214 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
   2215                                    const uint16_t *Opcodes) {
   2216   assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
   2217   SDLoc dl(N);
   2218 
   2219   SDValue MemAddr, Align;
   2220   if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align))
   2221     return;
   2222 
   2223   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2224   MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   2225 
   2226   SDValue Chain = N->getOperand(0);
   2227   EVT VT = N->getValueType(0);
   2228 
   2229   unsigned Alignment = 0;
   2230   if (NumVecs != 3) {
   2231     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
   2232     unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
   2233     if (Alignment > NumBytes)
   2234       Alignment = NumBytes;
   2235     if (Alignment < 8 && Alignment < NumBytes)
   2236       Alignment = 0;
   2237     // Alignment must be a power of two; make sure of that.
   2238     Alignment = (Alignment & -Alignment);
   2239     if (Alignment == 1)
   2240       Alignment = 0;
   2241   }
   2242   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
   2243 
   2244   unsigned OpcodeIndex;
   2245   switch (VT.getSimpleVT().SimpleTy) {
   2246   default: llvm_unreachable("unhandled vld-dup type");
   2247   case MVT::v8i8:  OpcodeIndex = 0; break;
   2248   case MVT::v4i16: OpcodeIndex = 1; break;
   2249   case MVT::v2f32:
   2250   case MVT::v2i32: OpcodeIndex = 2; break;
   2251   }
   2252 
   2253   SDValue Pred = getAL(CurDAG, dl);
   2254   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2255   SDValue SuperReg;
   2256   unsigned Opc = Opcodes[OpcodeIndex];
   2257   SmallVector<SDValue, 6> Ops;
   2258   Ops.push_back(MemAddr);
   2259   Ops.push_back(Align);
   2260   if (isUpdating) {
   2261     // fixed-stride update instructions don't have an explicit writeback
   2262     // operand. It's implicit in the opcode itself.
   2263     SDValue Inc = N->getOperand(2);
   2264     if (!isa<ConstantSDNode>(Inc.getNode()))
   2265       Ops.push_back(Inc);
   2266     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
   2267     else if (NumVecs > 2)
   2268       Ops.push_back(Reg0);
   2269   }
   2270   Ops.push_back(Pred);
   2271   Ops.push_back(Reg0);
   2272   Ops.push_back(Chain);
   2273 
   2274   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
   2275   std::vector<EVT> ResTys;
   2276   ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), MVT::i64,ResTyElts));
   2277   if (isUpdating)
   2278     ResTys.push_back(MVT::i32);
   2279   ResTys.push_back(MVT::Other);
   2280   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   2281   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
   2282   SuperReg = SDValue(VLdDup, 0);
   2283 
   2284   // Extract the subregisters.
   2285   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
   2286   unsigned SubIdx = ARM::dsub_0;
   2287   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
   2288     ReplaceUses(SDValue(N, Vec),
   2289                 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
   2290   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
   2291   if (isUpdating)
   2292     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
   2293   CurDAG->RemoveDeadNode(N);
   2294 }
   2295 
   2296 void ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs,
   2297                                  unsigned Opc) {
   2298   assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range");
   2299   SDLoc dl(N);
   2300   EVT VT = N->getValueType(0);
   2301   unsigned FirstTblReg = IsExt ? 2 : 1;
   2302 
   2303   // Form a REG_SEQUENCE to force register allocation.
   2304   SDValue RegSeq;
   2305   SDValue V0 = N->getOperand(FirstTblReg + 0);
   2306   SDValue V1 = N->getOperand(FirstTblReg + 1);
   2307   if (NumVecs == 2)
   2308     RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
   2309   else {
   2310     SDValue V2 = N->getOperand(FirstTblReg + 2);
   2311     // If it's a vtbl3, form a quad D-register and leave the last part as
   2312     // an undef.
   2313     SDValue V3 = (NumVecs == 3)
   2314       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
   2315       : N->getOperand(FirstTblReg + 3);
   2316     RegSeq = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
   2317   }
   2318 
   2319   SmallVector<SDValue, 6> Ops;
   2320   if (IsExt)
   2321     Ops.push_back(N->getOperand(1));
   2322   Ops.push_back(RegSeq);
   2323   Ops.push_back(N->getOperand(FirstTblReg + NumVecs));
   2324   Ops.push_back(getAL(CurDAG, dl)); // predicate
   2325   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // predicate register
   2326   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
   2327 }
   2328 
   2329 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
   2330   if (!Subtarget->hasV6T2Ops())
   2331     return false;
   2332 
   2333   unsigned Opc = isSigned
   2334     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
   2335     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
   2336   SDLoc dl(N);
   2337 
   2338   // For unsigned extracts, check for a shift right and mask
   2339   unsigned And_imm = 0;
   2340   if (N->getOpcode() == ISD::AND) {
   2341     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
   2342 
   2343       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
   2344       if (And_imm & (And_imm + 1))
   2345         return false;
   2346 
   2347       unsigned Srl_imm = 0;
   2348       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
   2349                                 Srl_imm)) {
   2350         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
   2351 
   2352         // Note: The width operand is encoded as width-1.
   2353         unsigned Width = countTrailingOnes(And_imm) - 1;
   2354         unsigned LSB = Srl_imm;
   2355 
   2356         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2357 
   2358         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
   2359           // It's cheaper to use a right shift to extract the top bits.
   2360           if (Subtarget->isThumb()) {
   2361             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
   2362             SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2363                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2364                               getAL(CurDAG, dl), Reg0, Reg0 };
   2365             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2366             return true;
   2367           }
   2368 
   2369           // ARM models shift instructions as MOVsi with shifter operand.
   2370           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
   2371           SDValue ShOpc =
   2372             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
   2373                                       MVT::i32);
   2374           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
   2375                             getAL(CurDAG, dl), Reg0, Reg0 };
   2376           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
   2377           return true;
   2378         }
   2379 
   2380         SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2381                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2382                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
   2383                           getAL(CurDAG, dl), Reg0 };
   2384         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2385         return true;
   2386       }
   2387     }
   2388     return false;
   2389   }
   2390 
   2391   // Otherwise, we're looking for a shift of a shift
   2392   unsigned Shl_imm = 0;
   2393   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
   2394     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
   2395     unsigned Srl_imm = 0;
   2396     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
   2397       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
   2398       // Note: The width operand is encoded as width-1.
   2399       unsigned Width = 32 - Srl_imm - 1;
   2400       int LSB = Srl_imm - Shl_imm;
   2401       if (LSB < 0)
   2402         return false;
   2403       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2404       SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2405                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2406                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
   2407                         getAL(CurDAG, dl), Reg0 };
   2408       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2409       return true;
   2410     }
   2411   }
   2412 
   2413   // Or we are looking for a shift of an and, with a mask operand
   2414   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
   2415       isShiftedMask_32(And_imm)) {
   2416     unsigned Srl_imm = 0;
   2417     unsigned LSB = countTrailingZeros(And_imm);
   2418     // Shift must be the same as the ands lsb
   2419     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
   2420       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
   2421       unsigned MSB = 31 - countLeadingZeros(And_imm);
   2422       // Note: The width operand is encoded as width-1.
   2423       unsigned Width = MSB - LSB;
   2424       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2425       SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2426                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
   2427                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
   2428                         getAL(CurDAG, dl), Reg0 };
   2429       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2430       return true;
   2431     }
   2432   }
   2433 
   2434   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
   2435     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
   2436     unsigned LSB = 0;
   2437     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
   2438         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
   2439       return false;
   2440 
   2441     if (LSB + Width > 32)
   2442       return false;
   2443 
   2444     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2445     SDValue Ops[] = { N->getOperand(0).getOperand(0),
   2446                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
   2447                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
   2448                       getAL(CurDAG, dl), Reg0 };
   2449     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2450     return true;
   2451   }
   2452 
   2453   return false;
   2454 }
   2455 
   2456 /// Target-specific DAG combining for ISD::XOR.
   2457 /// Target-independent combining lowers SELECT_CC nodes of the form
   2458 /// select_cc setg[ge] X,  0,  X, -X
   2459 /// select_cc setgt    X, -1,  X, -X
   2460 /// select_cc setl[te] X,  0, -X,  X
   2461 /// select_cc setlt    X,  1, -X,  X
   2462 /// which represent Integer ABS into:
   2463 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   2464 /// ARM instruction selection detects the latter and matches it to
   2465 /// ARM::ABS or ARM::t2ABS machine node.
   2466 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
   2467   SDValue XORSrc0 = N->getOperand(0);
   2468   SDValue XORSrc1 = N->getOperand(1);
   2469   EVT VT = N->getValueType(0);
   2470 
   2471   if (Subtarget->isThumb1Only())
   2472     return false;
   2473 
   2474   if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
   2475     return false;
   2476 
   2477   SDValue ADDSrc0 = XORSrc0.getOperand(0);
   2478   SDValue ADDSrc1 = XORSrc0.getOperand(1);
   2479   SDValue SRASrc0 = XORSrc1.getOperand(0);
   2480   SDValue SRASrc1 = XORSrc1.getOperand(1);
   2481   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
   2482   EVT XType = SRASrc0.getValueType();
   2483   unsigned Size = XType.getSizeInBits() - 1;
   2484 
   2485   if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
   2486       XType.isInteger() && SRAConstant != nullptr &&
   2487       Size == SRAConstant->getZExtValue()) {
   2488     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
   2489     CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
   2490     return true;
   2491   }
   2492 
   2493   return false;
   2494 }
   2495 
   2496 static bool SearchSignedMulShort(SDValue SignExt, unsigned *Opc, SDValue &Src1,
   2497                                  bool Accumulate) {
   2498   // For SM*WB, we need to some form of sext.
   2499   // For SM*WT, we need to search for (sra X, 16)
   2500   // Src1 then gets set to X.
   2501   if ((SignExt.getOpcode() == ISD::SIGN_EXTEND ||
   2502        SignExt.getOpcode() == ISD::SIGN_EXTEND_INREG ||
   2503        SignExt.getOpcode() == ISD::AssertSext) &&
   2504        SignExt.getValueType() == MVT::i32) {
   2505 
   2506     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
   2507     Src1 = SignExt.getOperand(0);
   2508     return true;
   2509   }
   2510 
   2511   if (SignExt.getOpcode() != ISD::SRA)
   2512     return false;
   2513 
   2514   ConstantSDNode *SRASrc1 = dyn_cast<ConstantSDNode>(SignExt.getOperand(1));
   2515   if (!SRASrc1 || SRASrc1->getZExtValue() != 16)
   2516     return false;
   2517 
   2518   SDValue Op0 = SignExt.getOperand(0);
   2519 
   2520   // The sign extend operand for SM*WB could be generated by a shl and ashr.
   2521   if (Op0.getOpcode() == ISD::SHL) {
   2522     SDValue SHL = Op0;
   2523     ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
   2524     if (!SHLSrc1 || SHLSrc1->getZExtValue() != 16)
   2525       return false;
   2526 
   2527     *Opc = Accumulate ? ARM::SMLAWB : ARM::SMULWB;
   2528     Src1 = Op0.getOperand(0);
   2529     return true;
   2530   }
   2531   *Opc = Accumulate ? ARM::SMLAWT : ARM::SMULWT;
   2532   Src1 = SignExt.getOperand(0);
   2533   return true;
   2534 }
   2535 
   2536 static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
   2537                                 SDValue &Src1, bool Accumulate) {
   2538   // First we look for:
   2539   // (add (or (srl ?, 16), (shl ?, 16)))
   2540   if (OR.getOpcode() != ISD::OR)
   2541     return false;
   2542 
   2543   SDValue SRL = OR.getOperand(0);
   2544   SDValue SHL = OR.getOperand(1);
   2545 
   2546   if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL) {
   2547     SRL = OR.getOperand(1);
   2548     SHL = OR.getOperand(0);
   2549     if (SRL.getOpcode() != ISD::SRL || SHL.getOpcode() != ISD::SHL)
   2550       return false;
   2551   }
   2552 
   2553   ConstantSDNode *SRLSrc1 = dyn_cast<ConstantSDNode>(SRL.getOperand(1));
   2554   ConstantSDNode *SHLSrc1 = dyn_cast<ConstantSDNode>(SHL.getOperand(1));
   2555   if (!SRLSrc1 || !SHLSrc1 || SRLSrc1->getZExtValue() != 16 ||
   2556       SHLSrc1->getZExtValue() != 16)
   2557     return false;
   2558 
   2559   // The first operands to the shifts need to be the two results from the
   2560   // same smul_lohi node.
   2561   if ((SRL.getOperand(0).getNode() != SHL.getOperand(0).getNode()) ||
   2562        SRL.getOperand(0).getOpcode() != ISD::SMUL_LOHI)
   2563     return false;
   2564 
   2565   SDNode *SMULLOHI = SRL.getOperand(0).getNode();
   2566   if (SRL.getOperand(0) != SDValue(SMULLOHI, 0) ||
   2567       SHL.getOperand(0) != SDValue(SMULLOHI, 1))
   2568     return false;
   2569 
   2570   // Now we have:
   2571   // (add (or (srl (smul_lohi ?, ?), 16), (shl (smul_lohi ?, ?), 16)))
   2572   // For SMLAW[B|T] smul_lohi will take a 32-bit and a 16-bit arguments.
   2573   // For SMLAWB the 16-bit value will signed extended somehow.
   2574   // For SMLAWT only the SRA is required.
   2575 
   2576   // Check both sides of SMUL_LOHI
   2577   if (SearchSignedMulShort(SMULLOHI->getOperand(0), Opc, Src1, Accumulate)) {
   2578     Src0 = SMULLOHI->getOperand(1);
   2579   } else if (SearchSignedMulShort(SMULLOHI->getOperand(1), Opc, Src1,
   2580                                   Accumulate)) {
   2581     Src0 = SMULLOHI->getOperand(0);
   2582   } else {
   2583     return false;
   2584   }
   2585   return true;
   2586 }
   2587 
   2588 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
   2589   SDLoc dl(N);
   2590   SDValue Src0 = N->getOperand(0);
   2591   SDValue Src1 = N->getOperand(1);
   2592   SDValue A, B;
   2593   unsigned Opc = 0;
   2594 
   2595   if (N->getOpcode() == ISD::ADD) {
   2596     if (Src0.getOpcode() != ISD::OR && Src1.getOpcode() != ISD::OR)
   2597       return false;
   2598 
   2599     SDValue Acc;
   2600     if (SearchSignedMulLong(Src0, &Opc, A, B, true)) {
   2601       Acc = Src1;
   2602     } else if (SearchSignedMulLong(Src1, &Opc, A, B, true)) {
   2603       Acc = Src0;
   2604     } else {
   2605       return false;
   2606     }
   2607     if (Opc == 0)
   2608       return false;
   2609 
   2610     SDValue Ops[] = { A, B, Acc, getAL(CurDAG, dl),
   2611                       CurDAG->getRegister(0, MVT::i32) };
   2612     CurDAG->SelectNodeTo(N, Opc, MVT::i32, MVT::Other, Ops);
   2613     return true;
   2614   } else if (N->getOpcode() == ISD::OR &&
   2615              SearchSignedMulLong(SDValue(N, 0), &Opc, A, B, false)) {
   2616     if (Opc == 0)
   2617       return false;
   2618 
   2619     SDValue Ops[] = { A, B, getAL(CurDAG, dl),
   2620                       CurDAG->getRegister(0, MVT::i32)};
   2621     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2622     return true;
   2623   }
   2624   return false;
   2625 }
   2626 
   2627 /// We've got special pseudo-instructions for these
   2628 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   2629   unsigned Opcode;
   2630   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
   2631   if (MemTy == MVT::i8)
   2632     Opcode = ARM::CMP_SWAP_8;
   2633   else if (MemTy == MVT::i16)
   2634     Opcode = ARM::CMP_SWAP_16;
   2635   else if (MemTy == MVT::i32)
   2636     Opcode = ARM::CMP_SWAP_32;
   2637   else
   2638     llvm_unreachable("Unknown AtomicCmpSwap type");
   2639 
   2640   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
   2641                    N->getOperand(0)};
   2642   SDNode *CmpSwap = CurDAG->getMachineNode(
   2643       Opcode, SDLoc(N),
   2644       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
   2645 
   2646   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   2647   MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
   2648   cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
   2649 
   2650   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
   2651   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
   2652   CurDAG->RemoveDeadNode(N);
   2653 }
   2654 
   2655 void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   2656   // The only time a CONCAT_VECTORS operation can have legal types is when
   2657   // two 64-bit vectors are concatenated to a 128-bit vector.
   2658   EVT VT = N->getValueType(0);
   2659   if (!VT.is128BitVector() || N->getNumOperands() != 2)
   2660     llvm_unreachable("unexpected CONCAT_VECTORS");
   2661   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
   2662 }
   2663 
   2664 void ARMDAGToDAGISel::Select(SDNode *N) {
   2665   SDLoc dl(N);
   2666 
   2667   if (N->isMachineOpcode()) {
   2668     N->setNodeId(-1);
   2669     return;   // Already selected.
   2670   }
   2671 
   2672   switch (N->getOpcode()) {
   2673   default: break;
   2674   case ISD::ADD:
   2675   case ISD::OR:
   2676     if (trySMLAWSMULW(N))
   2677       return;
   2678     break;
   2679   case ISD::WRITE_REGISTER:
   2680     if (tryWriteRegister(N))
   2681       return;
   2682     break;
   2683   case ISD::READ_REGISTER:
   2684     if (tryReadRegister(N))
   2685       return;
   2686     break;
   2687   case ISD::INLINEASM:
   2688     if (tryInlineAsm(N))
   2689       return;
   2690     break;
   2691   case ISD::XOR:
   2692     // Select special operations if XOR node forms integer ABS pattern
   2693     if (tryABSOp(N))
   2694       return;
   2695     // Other cases are autogenerated.
   2696     break;
   2697   case ISD::Constant: {
   2698     unsigned Val = cast<ConstantSDNode>(N)->getZExtValue();
   2699     // If we can't materialize the constant we need to use a literal pool
   2700     if (ConstantMaterializationCost(Val) > 2) {
   2701       SDValue CPIdx = CurDAG->getTargetConstantPool(
   2702           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
   2703           TLI->getPointerTy(CurDAG->getDataLayout()));
   2704 
   2705       SDNode *ResNode;
   2706       if (Subtarget->isThumb()) {
   2707         SDValue Pred = getAL(CurDAG, dl);
   2708         SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   2709         SDValue Ops[] = { CPIdx, Pred, PredReg, CurDAG->getEntryNode() };
   2710         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
   2711                                          Ops);
   2712       } else {
   2713         SDValue Ops[] = {
   2714           CPIdx,
   2715           CurDAG->getTargetConstant(0, dl, MVT::i32),
   2716           getAL(CurDAG, dl),
   2717           CurDAG->getRegister(0, MVT::i32),
   2718           CurDAG->getEntryNode()
   2719         };
   2720         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
   2721                                          Ops);
   2722       }
   2723       ReplaceNode(N, ResNode);
   2724       return;
   2725     }
   2726 
   2727     // Other cases are autogenerated.
   2728     break;
   2729   }
   2730   case ISD::FrameIndex: {
   2731     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
   2732     int FI = cast<FrameIndexSDNode>(N)->getIndex();
   2733     SDValue TFI = CurDAG->getTargetFrameIndex(
   2734         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   2735     if (Subtarget->isThumb1Only()) {
   2736       // Set the alignment of the frame object to 4, to avoid having to generate
   2737       // more than one ADD
   2738       MachineFrameInfo *MFI = MF->getFrameInfo();
   2739       if (MFI->getObjectAlignment(FI) < 4)
   2740         MFI->setObjectAlignment(FI, 4);
   2741       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
   2742                            CurDAG->getTargetConstant(0, dl, MVT::i32));
   2743       return;
   2744     } else {
   2745       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
   2746                       ARM::t2ADDri : ARM::ADDri);
   2747       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
   2748                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2749                         CurDAG->getRegister(0, MVT::i32) };
   2750       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
   2751       return;
   2752     }
   2753   }
   2754   case ISD::SRL:
   2755     if (tryV6T2BitfieldExtractOp(N, false))
   2756       return;
   2757     break;
   2758   case ISD::SIGN_EXTEND_INREG:
   2759   case ISD::SRA:
   2760     if (tryV6T2BitfieldExtractOp(N, true))
   2761       return;
   2762     break;
   2763   case ISD::MUL:
   2764     if (Subtarget->isThumb1Only())
   2765       break;
   2766     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
   2767       unsigned RHSV = C->getZExtValue();
   2768       if (!RHSV) break;
   2769       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
   2770         unsigned ShImm = Log2_32(RHSV-1);
   2771         if (ShImm >= 32)
   2772           break;
   2773         SDValue V = N->getOperand(0);
   2774         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
   2775         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
   2776         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2777         if (Subtarget->isThumb()) {
   2778           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
   2779           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
   2780           return;
   2781         } else {
   2782           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
   2783                             Reg0 };
   2784           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
   2785           return;
   2786         }
   2787       }
   2788       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
   2789         unsigned ShImm = Log2_32(RHSV+1);
   2790         if (ShImm >= 32)
   2791           break;
   2792         SDValue V = N->getOperand(0);
   2793         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
   2794         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
   2795         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
   2796         if (Subtarget->isThumb()) {
   2797           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
   2798           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
   2799           return;
   2800         } else {
   2801           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
   2802                             Reg0 };
   2803           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
   2804           return;
   2805         }
   2806       }
   2807     }
   2808     break;
   2809   case ISD::AND: {
   2810     // Check for unsigned bitfield extract
   2811     if (tryV6T2BitfieldExtractOp(N, false))
   2812       return;
   2813 
   2814     // If an immediate is used in an AND node, it is possible that the immediate
   2815     // can be more optimally materialized when negated. If this is the case we
   2816     // can negate the immediate and use a BIC instead.
   2817     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   2818     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
   2819       uint32_t Imm = (uint32_t) N1C->getZExtValue();
   2820 
   2821       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
   2822       // immediate can be negated and fit in the immediate operand of
   2823       // a t2BIC, don't do any manual transform here as this can be
   2824       // handled by the generic ISel machinery.
   2825       bool PreferImmediateEncoding =
   2826         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
   2827       if (!PreferImmediateEncoding &&
   2828           ConstantMaterializationCost(Imm) >
   2829               ConstantMaterializationCost(~Imm)) {
   2830         // The current immediate costs more to materialize than a negated
   2831         // immediate, so negate the immediate and use a BIC.
   2832         SDValue NewImm =
   2833           CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);
   2834         // If the new constant didn't exist before, reposition it in the topological
   2835         // ordering so it is just before N. Otherwise, don't touch its location.
   2836         if (NewImm->getNodeId() == -1)
   2837           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
   2838 
   2839         if (!Subtarget->hasThumb2()) {
   2840           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
   2841                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
   2842                            CurDAG->getRegister(0, MVT::i32)};
   2843           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
   2844           return;
   2845         } else {
   2846           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
   2847                            CurDAG->getRegister(0, MVT::i32),
   2848                            CurDAG->getRegister(0, MVT::i32)};
   2849           ReplaceNode(N,
   2850                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
   2851           return;
   2852         }
   2853       }
   2854     }
   2855 
   2856     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
   2857     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
   2858     // are entirely contributed by c2 and lower 16-bits are entirely contributed
   2859     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
   2860     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
   2861     EVT VT = N->getValueType(0);
   2862     if (VT != MVT::i32)
   2863       break;
   2864     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
   2865       ? ARM::t2MOVTi16
   2866       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
   2867     if (!Opc)
   2868       break;
   2869     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2870     N1C = dyn_cast<ConstantSDNode>(N1);
   2871     if (!N1C)
   2872       break;
   2873     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
   2874       SDValue N2 = N0.getOperand(1);
   2875       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
   2876       if (!N2C)
   2877         break;
   2878       unsigned N1CVal = N1C->getZExtValue();
   2879       unsigned N2CVal = N2C->getZExtValue();
   2880       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
   2881           (N1CVal & 0xffffU) == 0xffffU &&
   2882           (N2CVal & 0xffffU) == 0x0U) {
   2883         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
   2884                                                   dl, MVT::i32);
   2885         SDValue Ops[] = { N0.getOperand(0), Imm16,
   2886                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2887         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
   2888         return;
   2889       }
   2890     }
   2891     break;
   2892   }
   2893   case ARMISD::VMOVRRD:
   2894     ReplaceNode(N, CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
   2895                                           N->getOperand(0), getAL(CurDAG, dl),
   2896                                           CurDAG->getRegister(0, MVT::i32)));
   2897     return;
   2898   case ISD::UMUL_LOHI: {
   2899     if (Subtarget->isThumb1Only())
   2900       break;
   2901     if (Subtarget->isThumb()) {
   2902       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2903                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2904       ReplaceNode(
   2905           N, CurDAG->getMachineNode(ARM::t2UMULL, dl, MVT::i32, MVT::i32, Ops));
   2906       return;
   2907     } else {
   2908       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2909                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2910                         CurDAG->getRegister(0, MVT::i32) };
   2911       ReplaceNode(N, CurDAG->getMachineNode(
   2912                          Subtarget->hasV6Ops() ? ARM::UMULL : ARM::UMULLv5, dl,
   2913                          MVT::i32, MVT::i32, Ops));
   2914       return;
   2915     }
   2916   }
   2917   case ISD::SMUL_LOHI: {
   2918     if (Subtarget->isThumb1Only())
   2919       break;
   2920     if (Subtarget->isThumb()) {
   2921       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2922                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
   2923       ReplaceNode(
   2924           N, CurDAG->getMachineNode(ARM::t2SMULL, dl, MVT::i32, MVT::i32, Ops));
   2925       return;
   2926     } else {
   2927       SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2928                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
   2929                         CurDAG->getRegister(0, MVT::i32) };
   2930       ReplaceNode(N, CurDAG->getMachineNode(
   2931                          Subtarget->hasV6Ops() ? ARM::SMULL : ARM::SMULLv5, dl,
   2932                          MVT::i32, MVT::i32, Ops));
   2933       return;
   2934     }
   2935   }
   2936   case ARMISD::UMAAL: {
   2937     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
   2938     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2939                       N->getOperand(2), N->getOperand(3),
   2940                       getAL(CurDAG, dl),
   2941                       CurDAG->getRegister(0, MVT::i32) };
   2942     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
   2943     return;
   2944   }
   2945   case ARMISD::UMLAL:{
   2946     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
   2947     // 64-bit multiplication result.
   2948     if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
   2949         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
   2950 
   2951       SDValue Addc = N->getOperand(2);
   2952       SDValue Adde = N->getOperand(3);
   2953 
   2954       if (Adde.getOperand(2).getNode() == Addc.getNode()) {
   2955 
   2956         ConstantSDNode *Op0 = dyn_cast<ConstantSDNode>(Adde.getOperand(0));
   2957         ConstantSDNode *Op1 = dyn_cast<ConstantSDNode>(Adde.getOperand(1));
   2958 
   2959         if (Op0 && Op1 && Op0->getZExtValue() == 0 && Op1->getZExtValue() == 0)
   2960         {
   2961           // Select UMAAL instead: UMAAL RdLo, RdHi, Rn, Rm
   2962           // RdLo = one operand to be added, lower 32-bits of res
   2963           // RdHi = other operand to be added, upper 32-bits of res
   2964           // Rn = first multiply operand
   2965           // Rm = second multiply operand
   2966           SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
   2967                             Addc.getOperand(0), Addc.getOperand(1),
   2968                             getAL(CurDAG, dl),
   2969                             CurDAG->getRegister(0, MVT::i32) };
   2970           unsigned opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
   2971           CurDAG->SelectNodeTo(N, opc, MVT::i32, MVT::i32, Ops);
   2972           return;
   2973         }
   2974       }
   2975     }
   2976 
   2977     if (Subtarget->isThumb()) {
   2978       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2979                         N->getOperand(3), getAL(CurDAG, dl),
   2980                         CurDAG->getRegister(0, MVT::i32)};
   2981       ReplaceNode(
   2982           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
   2983       return;
   2984     }else{
   2985       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2986                         N->getOperand(3), getAL(CurDAG, dl),
   2987                         CurDAG->getRegister(0, MVT::i32),
   2988                         CurDAG->getRegister(0, MVT::i32) };
   2989       ReplaceNode(N, CurDAG->getMachineNode(
   2990                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
   2991                          MVT::i32, MVT::i32, Ops));
   2992       return;
   2993     }
   2994   }
   2995   case ARMISD::SMLAL:{
   2996     if (Subtarget->isThumb()) {
   2997       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   2998                         N->getOperand(3), getAL(CurDAG, dl),
   2999                         CurDAG->getRegister(0, MVT::i32)};
   3000       ReplaceNode(
   3001           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
   3002       return;
   3003     }else{
   3004       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
   3005                         N->getOperand(3), getAL(CurDAG, dl),
   3006                         CurDAG->getRegister(0, MVT::i32),
   3007                         CurDAG->getRegister(0, MVT::i32) };
   3008       ReplaceNode(N, CurDAG->getMachineNode(
   3009                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
   3010                          MVT::i32, MVT::i32, Ops));
   3011       return;
   3012     }
   3013   }
   3014   case ISD::LOAD: {
   3015     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
   3016       if (tryT2IndexedLoad(N))
   3017         return;
   3018     } else if (tryARMIndexedLoad(N))
   3019       return;
   3020     // Other cases are autogenerated.
   3021     break;
   3022   }
   3023   case ARMISD::BRCOND: {
   3024     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   3025     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
   3026     // Pattern complexity = 6  cost = 1  size = 0
   3027 
   3028     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   3029     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
   3030     // Pattern complexity = 6  cost = 1  size = 0
   3031 
   3032     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
   3033     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
   3034     // Pattern complexity = 6  cost = 1  size = 0
   3035 
   3036     unsigned Opc = Subtarget->isThumb() ?
   3037       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
   3038     SDValue Chain = N->getOperand(0);
   3039     SDValue N1 = N->getOperand(1);
   3040     SDValue N2 = N->getOperand(2);
   3041     SDValue N3 = N->getOperand(3);
   3042     SDValue InFlag = N->getOperand(4);
   3043     assert(N1.getOpcode() == ISD::BasicBlock);
   3044     assert(N2.getOpcode() == ISD::Constant);
   3045     assert(N3.getOpcode() == ISD::Register);
   3046 
   3047     SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
   3048                                cast<ConstantSDNode>(N2)->getZExtValue()), dl,
   3049                                MVT::i32);
   3050     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
   3051     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
   3052                                              MVT::Glue, Ops);
   3053     Chain = SDValue(ResNode, 0);
   3054     if (N->getNumValues() == 2) {
   3055       InFlag = SDValue(ResNode, 1);
   3056       ReplaceUses(SDValue(N, 1), InFlag);
   3057     }
   3058     ReplaceUses(SDValue(N, 0),
   3059                 SDValue(Chain.getNode(), Chain.getResNo()));
   3060     CurDAG->RemoveDeadNode(N);
   3061     return;
   3062   }
   3063   case ARMISD::VZIP: {
   3064     unsigned Opc = 0;
   3065     EVT VT = N->getValueType(0);
   3066     switch (VT.getSimpleVT().SimpleTy) {
   3067     default: return;
   3068     case MVT::v8i8:  Opc = ARM::VZIPd8; break;
   3069     case MVT::v4i16: Opc = ARM::VZIPd16; break;
   3070     case MVT::v2f32:
   3071     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
   3072     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   3073     case MVT::v16i8: Opc = ARM::VZIPq8; break;
   3074     case MVT::v8i16: Opc = ARM::VZIPq16; break;
   3075     case MVT::v4f32:
   3076     case MVT::v4i32: Opc = ARM::VZIPq32; break;
   3077     }
   3078     SDValue Pred = getAL(CurDAG, dl);
   3079     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   3080     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   3081     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
   3082     return;
   3083   }
   3084   case ARMISD::VUZP: {
   3085     unsigned Opc = 0;
   3086     EVT VT = N->getValueType(0);
   3087     switch (VT.getSimpleVT().SimpleTy) {
   3088     default: return;
   3089     case MVT::v8i8:  Opc = ARM::VUZPd8; break;
   3090     case MVT::v4i16: Opc = ARM::VUZPd16; break;
   3091     case MVT::v2f32:
   3092     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
   3093     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   3094     case MVT::v16i8: Opc = ARM::VUZPq8; break;
   3095     case MVT::v8i16: Opc = ARM::VUZPq16; break;
   3096     case MVT::v4f32:
   3097     case MVT::v4i32: Opc = ARM::VUZPq32; break;
   3098     }
   3099     SDValue Pred = getAL(CurDAG, dl);
   3100     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   3101     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   3102     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
   3103     return;
   3104   }
   3105   case ARMISD::VTRN: {
   3106     unsigned Opc = 0;
   3107     EVT VT = N->getValueType(0);
   3108     switch (VT.getSimpleVT().SimpleTy) {
   3109     default: return;
   3110     case MVT::v8i8:  Opc = ARM::VTRNd8; break;
   3111     case MVT::v4i16: Opc = ARM::VTRNd16; break;
   3112     case MVT::v2f32:
   3113     case MVT::v2i32: Opc = ARM::VTRNd32; break;
   3114     case MVT::v16i8: Opc = ARM::VTRNq8; break;
   3115     case MVT::v8i16: Opc = ARM::VTRNq16; break;
   3116     case MVT::v4f32:
   3117     case MVT::v4i32: Opc = ARM::VTRNq32; break;
   3118     }
   3119     SDValue Pred = getAL(CurDAG, dl);
   3120     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
   3121     SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg };
   3122     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
   3123     return;
   3124   }
   3125   case ARMISD::BUILD_VECTOR: {
   3126     EVT VecVT = N->getValueType(0);
   3127     EVT EltVT = VecVT.getVectorElementType();
   3128     unsigned NumElts = VecVT.getVectorNumElements();
   3129     if (EltVT == MVT::f64) {
   3130       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
   3131       ReplaceNode(
   3132           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
   3133       return;
   3134     }
   3135     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
   3136     if (NumElts == 2) {
   3137       ReplaceNode(
   3138           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
   3139       return;
   3140     }
   3141     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
   3142     ReplaceNode(N,
   3143                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
   3144                                     N->getOperand(2), N->getOperand(3)));
   3145     return;
   3146   }
   3147 
   3148   case ARMISD::VLD2DUP: {
   3149     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
   3150                                         ARM::VLD2DUPd32 };
   3151     SelectVLDDup(N, false, 2, Opcodes);
   3152     return;
   3153   }
   3154 
   3155   case ARMISD::VLD3DUP: {
   3156     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
   3157                                         ARM::VLD3DUPd16Pseudo,
   3158                                         ARM::VLD3DUPd32Pseudo };
   3159     SelectVLDDup(N, false, 3, Opcodes);
   3160     return;
   3161   }
   3162 
   3163   case ARMISD::VLD4DUP: {
   3164     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
   3165                                         ARM::VLD4DUPd16Pseudo,
   3166                                         ARM::VLD4DUPd32Pseudo };
   3167     SelectVLDDup(N, false, 4, Opcodes);
   3168     return;
   3169   }
   3170 
   3171   case ARMISD::VLD2DUP_UPD: {
   3172     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
   3173                                         ARM::VLD2DUPd16wb_fixed,
   3174                                         ARM::VLD2DUPd32wb_fixed };
   3175     SelectVLDDup(N, true, 2, Opcodes);
   3176     return;
   3177   }
   3178 
   3179   case ARMISD::VLD3DUP_UPD: {
   3180     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
   3181                                         ARM::VLD3DUPd16Pseudo_UPD,
   3182                                         ARM::VLD3DUPd32Pseudo_UPD };
   3183     SelectVLDDup(N, true, 3, Opcodes);
   3184     return;
   3185   }
   3186 
   3187   case ARMISD::VLD4DUP_UPD: {
   3188     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
   3189                                         ARM::VLD4DUPd16Pseudo_UPD,
   3190                                         ARM::VLD4DUPd32Pseudo_UPD };
   3191     SelectVLDDup(N, true, 4, Opcodes);
   3192     return;
   3193   }
   3194 
   3195   case ARMISD::VLD1_UPD: {
   3196     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
   3197                                          ARM::VLD1d16wb_fixed,
   3198                                          ARM::VLD1d32wb_fixed,
   3199                                          ARM::VLD1d64wb_fixed };
   3200     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
   3201                                          ARM::VLD1q16wb_fixed,
   3202                                          ARM::VLD1q32wb_fixed,
   3203                                          ARM::VLD1q64wb_fixed };
   3204     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
   3205     return;
   3206   }
   3207 
   3208   case ARMISD::VLD2_UPD: {
   3209     static const uint16_t DOpcodes[] = { ARM::VLD2d8wb_fixed,
   3210                                          ARM::VLD2d16wb_fixed,
   3211                                          ARM::VLD2d32wb_fixed,
   3212                                          ARM::VLD1q64wb_fixed};
   3213     static const uint16_t QOpcodes[] = { ARM::VLD2q8PseudoWB_fixed,
   3214                                          ARM::VLD2q16PseudoWB_fixed,
   3215                                          ARM::VLD2q32PseudoWB_fixed };
   3216     SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
   3217     return;
   3218   }
   3219 
   3220   case ARMISD::VLD3_UPD: {
   3221     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
   3222                                          ARM::VLD3d16Pseudo_UPD,
   3223                                          ARM::VLD3d32Pseudo_UPD,
   3224                                          ARM::VLD1d64TPseudoWB_fixed};
   3225     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
   3226                                           ARM::VLD3q16Pseudo_UPD,
   3227                                           ARM::VLD3q32Pseudo_UPD };
   3228     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
   3229                                           ARM::VLD3q16oddPseudo_UPD,
   3230                                           ARM::VLD3q32oddPseudo_UPD };
   3231     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3232     return;
   3233   }
   3234 
   3235   case ARMISD::VLD4_UPD: {
   3236     static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo_UPD,
   3237                                          ARM::VLD4d16Pseudo_UPD,
   3238                                          ARM::VLD4d32Pseudo_UPD,
   3239                                          ARM::VLD1d64QPseudoWB_fixed};
   3240     static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
   3241                                           ARM::VLD4q16Pseudo_UPD,
   3242                                           ARM::VLD4q32Pseudo_UPD };
   3243     static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo_UPD,
   3244                                           ARM::VLD4q16oddPseudo_UPD,
   3245                                           ARM::VLD4q32oddPseudo_UPD };
   3246     SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3247     return;
   3248   }
   3249 
   3250   case ARMISD::VLD2LN_UPD: {
   3251     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
   3252                                          ARM::VLD2LNd16Pseudo_UPD,
   3253                                          ARM::VLD2LNd32Pseudo_UPD };
   3254     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
   3255                                          ARM::VLD2LNq32Pseudo_UPD };
   3256     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
   3257     return;
   3258   }
   3259 
   3260   case ARMISD::VLD3LN_UPD: {
   3261     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
   3262                                          ARM::VLD3LNd16Pseudo_UPD,
   3263                                          ARM::VLD3LNd32Pseudo_UPD };
   3264     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
   3265                                          ARM::VLD3LNq32Pseudo_UPD };
   3266     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
   3267     return;
   3268   }
   3269 
   3270   case ARMISD::VLD4LN_UPD: {
   3271     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
   3272                                          ARM::VLD4LNd16Pseudo_UPD,
   3273                                          ARM::VLD4LNd32Pseudo_UPD };
   3274     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
   3275                                          ARM::VLD4LNq32Pseudo_UPD };
   3276     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
   3277     return;
   3278   }
   3279 
   3280   case ARMISD::VST1_UPD: {
   3281     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
   3282                                          ARM::VST1d16wb_fixed,
   3283                                          ARM::VST1d32wb_fixed,
   3284                                          ARM::VST1d64wb_fixed };
   3285     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
   3286                                          ARM::VST1q16wb_fixed,
   3287                                          ARM::VST1q32wb_fixed,
   3288                                          ARM::VST1q64wb_fixed };
   3289     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
   3290     return;
   3291   }
   3292 
   3293   case ARMISD::VST2_UPD: {
   3294     static const uint16_t DOpcodes[] = { ARM::VST2d8wb_fixed,
   3295                                          ARM::VST2d16wb_fixed,
   3296                                          ARM::VST2d32wb_fixed,
   3297                                          ARM::VST1q64wb_fixed};
   3298     static const uint16_t QOpcodes[] = { ARM::VST2q8PseudoWB_fixed,
   3299                                          ARM::VST2q16PseudoWB_fixed,
   3300                                          ARM::VST2q32PseudoWB_fixed };
   3301     SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
   3302     return;
   3303   }
   3304 
   3305   case ARMISD::VST3_UPD: {
   3306     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
   3307                                          ARM::VST3d16Pseudo_UPD,
   3308                                          ARM::VST3d32Pseudo_UPD,
   3309                                          ARM::VST1d64TPseudoWB_fixed};
   3310     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
   3311                                           ARM::VST3q16Pseudo_UPD,
   3312                                           ARM::VST3q32Pseudo_UPD };
   3313     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
   3314                                           ARM::VST3q16oddPseudo_UPD,
   3315                                           ARM::VST3q32oddPseudo_UPD };
   3316     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3317     return;
   3318   }
   3319 
   3320   case ARMISD::VST4_UPD: {
   3321     static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo_UPD,
   3322                                          ARM::VST4d16Pseudo_UPD,
   3323                                          ARM::VST4d32Pseudo_UPD,
   3324                                          ARM::VST1d64QPseudoWB_fixed};
   3325     static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
   3326                                           ARM::VST4q16Pseudo_UPD,
   3327                                           ARM::VST4q32Pseudo_UPD };
   3328     static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo_UPD,
   3329                                           ARM::VST4q16oddPseudo_UPD,
   3330                                           ARM::VST4q32oddPseudo_UPD };
   3331     SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3332     return;
   3333   }
   3334 
   3335   case ARMISD::VST2LN_UPD: {
   3336     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
   3337                                          ARM::VST2LNd16Pseudo_UPD,
   3338                                          ARM::VST2LNd32Pseudo_UPD };
   3339     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
   3340                                          ARM::VST2LNq32Pseudo_UPD };
   3341     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
   3342     return;
   3343   }
   3344 
   3345   case ARMISD::VST3LN_UPD: {
   3346     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
   3347                                          ARM::VST3LNd16Pseudo_UPD,
   3348                                          ARM::VST3LNd32Pseudo_UPD };
   3349     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
   3350                                          ARM::VST3LNq32Pseudo_UPD };
   3351     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
   3352     return;
   3353   }
   3354 
   3355   case ARMISD::VST4LN_UPD: {
   3356     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
   3357                                          ARM::VST4LNd16Pseudo_UPD,
   3358                                          ARM::VST4LNd32Pseudo_UPD };
   3359     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
   3360                                          ARM::VST4LNq32Pseudo_UPD };
   3361     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
   3362     return;
   3363   }
   3364 
   3365   case ISD::INTRINSIC_VOID:
   3366   case ISD::INTRINSIC_W_CHAIN: {
   3367     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
   3368     switch (IntNo) {
   3369     default:
   3370       break;
   3371 
   3372     case Intrinsic::arm_mrrc:
   3373     case Intrinsic::arm_mrrc2: {
   3374       SDLoc dl(N);
   3375       SDValue Chain = N->getOperand(0);
   3376       unsigned Opc;
   3377 
   3378       if (Subtarget->isThumb())
   3379         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
   3380       else
   3381         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
   3382 
   3383       SmallVector<SDValue, 5> Ops;
   3384       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */
   3385       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */
   3386       Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */
   3387 
   3388       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
   3389       // instruction will always be '1111' but it is possible in assembly language to specify
   3390       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
   3391       if (Opc != ARM::MRRC2) {
   3392         Ops.push_back(getAL(CurDAG, dl));
   3393         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3394       }
   3395 
   3396       Ops.push_back(Chain);
   3397 
   3398       // Writes to two registers.
   3399       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
   3400 
   3401       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
   3402       return;
   3403     }
   3404     case Intrinsic::arm_ldaexd:
   3405     case Intrinsic::arm_ldrexd: {
   3406       SDLoc dl(N);
   3407       SDValue Chain = N->getOperand(0);
   3408       SDValue MemAddr = N->getOperand(2);
   3409       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
   3410 
   3411       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
   3412       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
   3413                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
   3414 
   3415       // arm_ldrexd returns a i64 value in {i32, i32}
   3416       std::vector<EVT> ResTys;
   3417       if (isThumb) {
   3418         ResTys.push_back(MVT::i32);
   3419         ResTys.push_back(MVT::i32);
   3420       } else
   3421         ResTys.push_back(MVT::Untyped);
   3422       ResTys.push_back(MVT::Other);
   3423 
   3424       // Place arguments in the right order.
   3425       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
   3426                        CurDAG->getRegister(0, MVT::i32), Chain};
   3427       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
   3428       // Transfer memoperands.
   3429       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   3430       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   3431       cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
   3432 
   3433       // Remap uses.
   3434       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
   3435       if (!SDValue(N, 0).use_empty()) {
   3436         SDValue Result;
   3437         if (isThumb)
   3438           Result = SDValue(Ld, 0);
   3439         else {
   3440           SDValue SubRegIdx =
   3441             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
   3442           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
   3443               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
   3444           Result = SDValue(ResNode,0);
   3445         }
   3446         ReplaceUses(SDValue(N, 0), Result);
   3447       }
   3448       if (!SDValue(N, 1).use_empty()) {
   3449         SDValue Result;
   3450         if (isThumb)
   3451           Result = SDValue(Ld, 1);
   3452         else {
   3453           SDValue SubRegIdx =
   3454             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
   3455           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
   3456               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
   3457           Result = SDValue(ResNode,0);
   3458         }
   3459         ReplaceUses(SDValue(N, 1), Result);
   3460       }
   3461       ReplaceUses(SDValue(N, 2), OutChain);
   3462       CurDAG->RemoveDeadNode(N);
   3463       return;
   3464     }
   3465     case Intrinsic::arm_stlexd:
   3466     case Intrinsic::arm_strexd: {
   3467       SDLoc dl(N);
   3468       SDValue Chain = N->getOperand(0);
   3469       SDValue Val0 = N->getOperand(2);
   3470       SDValue Val1 = N->getOperand(3);
   3471       SDValue MemAddr = N->getOperand(4);
   3472 
   3473       // Store exclusive double return a i32 value which is the return status
   3474       // of the issued store.
   3475       const EVT ResTys[] = {MVT::i32, MVT::Other};
   3476 
   3477       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
   3478       // Place arguments in the right order.
   3479       SmallVector<SDValue, 7> Ops;
   3480       if (isThumb) {
   3481         Ops.push_back(Val0);
   3482         Ops.push_back(Val1);
   3483       } else
   3484         // arm_strexd uses GPRPair.
   3485         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
   3486       Ops.push_back(MemAddr);
   3487       Ops.push_back(getAL(CurDAG, dl));
   3488       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3489       Ops.push_back(Chain);
   3490 
   3491       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
   3492       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
   3493                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
   3494 
   3495       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
   3496       // Transfer memoperands.
   3497       MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   3498       MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   3499       cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
   3500 
   3501       ReplaceNode(N, St);
   3502       return;
   3503     }
   3504 
   3505     case Intrinsic::arm_neon_vld1: {
   3506       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
   3507                                            ARM::VLD1d32, ARM::VLD1d64 };
   3508       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
   3509                                            ARM::VLD1q32, ARM::VLD1q64};
   3510       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
   3511       return;
   3512     }
   3513 
   3514     case Intrinsic::arm_neon_vld2: {
   3515       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
   3516                                            ARM::VLD2d32, ARM::VLD1q64 };
   3517       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
   3518                                            ARM::VLD2q32Pseudo };
   3519       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
   3520       return;
   3521     }
   3522 
   3523     case Intrinsic::arm_neon_vld3: {
   3524       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
   3525                                            ARM::VLD3d16Pseudo,
   3526                                            ARM::VLD3d32Pseudo,
   3527                                            ARM::VLD1d64TPseudo };
   3528       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
   3529                                             ARM::VLD3q16Pseudo_UPD,
   3530                                             ARM::VLD3q32Pseudo_UPD };
   3531       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
   3532                                             ARM::VLD3q16oddPseudo,
   3533                                             ARM::VLD3q32oddPseudo };
   3534       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3535       return;
   3536     }
   3537 
   3538     case Intrinsic::arm_neon_vld4: {
   3539       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
   3540                                            ARM::VLD4d16Pseudo,
   3541                                            ARM::VLD4d32Pseudo,
   3542                                            ARM::VLD1d64QPseudo };
   3543       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
   3544                                             ARM::VLD4q16Pseudo_UPD,
   3545                                             ARM::VLD4q32Pseudo_UPD };
   3546       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
   3547                                             ARM::VLD4q16oddPseudo,
   3548                                             ARM::VLD4q32oddPseudo };
   3549       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3550       return;
   3551     }
   3552 
   3553     case Intrinsic::arm_neon_vld2lane: {
   3554       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
   3555                                            ARM::VLD2LNd16Pseudo,
   3556                                            ARM::VLD2LNd32Pseudo };
   3557       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
   3558                                            ARM::VLD2LNq32Pseudo };
   3559       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
   3560       return;
   3561     }
   3562 
   3563     case Intrinsic::arm_neon_vld3lane: {
   3564       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
   3565                                            ARM::VLD3LNd16Pseudo,
   3566                                            ARM::VLD3LNd32Pseudo };
   3567       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
   3568                                            ARM::VLD3LNq32Pseudo };
   3569       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
   3570       return;
   3571     }
   3572 
   3573     case Intrinsic::arm_neon_vld4lane: {
   3574       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
   3575                                            ARM::VLD4LNd16Pseudo,
   3576                                            ARM::VLD4LNd32Pseudo };
   3577       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
   3578                                            ARM::VLD4LNq32Pseudo };
   3579       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
   3580       return;
   3581     }
   3582 
   3583     case Intrinsic::arm_neon_vst1: {
   3584       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
   3585                                            ARM::VST1d32, ARM::VST1d64 };
   3586       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
   3587                                            ARM::VST1q32, ARM::VST1q64 };
   3588       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
   3589       return;
   3590     }
   3591 
   3592     case Intrinsic::arm_neon_vst2: {
   3593       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
   3594                                            ARM::VST2d32, ARM::VST1q64 };
   3595       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
   3596                                            ARM::VST2q32Pseudo };
   3597       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
   3598       return;
   3599     }
   3600 
   3601     case Intrinsic::arm_neon_vst3: {
   3602       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
   3603                                            ARM::VST3d16Pseudo,
   3604                                            ARM::VST3d32Pseudo,
   3605                                            ARM::VST1d64TPseudo };
   3606       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
   3607                                             ARM::VST3q16Pseudo_UPD,
   3608                                             ARM::VST3q32Pseudo_UPD };
   3609       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
   3610                                             ARM::VST3q16oddPseudo,
   3611                                             ARM::VST3q32oddPseudo };
   3612       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
   3613       return;
   3614     }
   3615 
   3616     case Intrinsic::arm_neon_vst4: {
   3617       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
   3618                                            ARM::VST4d16Pseudo,
   3619                                            ARM::VST4d32Pseudo,
   3620                                            ARM::VST1d64QPseudo };
   3621       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
   3622                                             ARM::VST4q16Pseudo_UPD,
   3623                                             ARM::VST4q32Pseudo_UPD };
   3624       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
   3625                                             ARM::VST4q16oddPseudo,
   3626                                             ARM::VST4q32oddPseudo };
   3627       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
   3628       return;
   3629     }
   3630 
   3631     case Intrinsic::arm_neon_vst2lane: {
   3632       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
   3633                                            ARM::VST2LNd16Pseudo,
   3634                                            ARM::VST2LNd32Pseudo };
   3635       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
   3636                                            ARM::VST2LNq32Pseudo };
   3637       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
   3638       return;
   3639     }
   3640 
   3641     case Intrinsic::arm_neon_vst3lane: {
   3642       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
   3643                                            ARM::VST3LNd16Pseudo,
   3644                                            ARM::VST3LNd32Pseudo };
   3645       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
   3646                                            ARM::VST3LNq32Pseudo };
   3647       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
   3648       return;
   3649     }
   3650 
   3651     case Intrinsic::arm_neon_vst4lane: {
   3652       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
   3653                                            ARM::VST4LNd16Pseudo,
   3654                                            ARM::VST4LNd32Pseudo };
   3655       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
   3656                                            ARM::VST4LNq32Pseudo };
   3657       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
   3658       return;
   3659     }
   3660     }
   3661     break;
   3662   }
   3663 
   3664   case ISD::INTRINSIC_WO_CHAIN: {
   3665     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   3666     switch (IntNo) {
   3667     default:
   3668       break;
   3669 
   3670     case Intrinsic::arm_neon_vtbl2:
   3671       SelectVTBL(N, false, 2, ARM::VTBL2);
   3672       return;
   3673     case Intrinsic::arm_neon_vtbl3:
   3674       SelectVTBL(N, false, 3, ARM::VTBL3Pseudo);
   3675       return;
   3676     case Intrinsic::arm_neon_vtbl4:
   3677       SelectVTBL(N, false, 4, ARM::VTBL4Pseudo);
   3678       return;
   3679 
   3680     case Intrinsic::arm_neon_vtbx2:
   3681       SelectVTBL(N, true, 2, ARM::VTBX2);
   3682       return;
   3683     case Intrinsic::arm_neon_vtbx3:
   3684       SelectVTBL(N, true, 3, ARM::VTBX3Pseudo);
   3685       return;
   3686     case Intrinsic::arm_neon_vtbx4:
   3687       SelectVTBL(N, true, 4, ARM::VTBX4Pseudo);
   3688       return;
   3689     }
   3690     break;
   3691   }
   3692 
   3693   case ARMISD::VTBL1: {
   3694     SDLoc dl(N);
   3695     EVT VT = N->getValueType(0);
   3696     SDValue Ops[] = {N->getOperand(0), N->getOperand(1),
   3697                      getAL(CurDAG, dl),                 // Predicate
   3698                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
   3699     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops));
   3700     return;
   3701   }
   3702   case ARMISD::VTBL2: {
   3703     SDLoc dl(N);
   3704     EVT VT = N->getValueType(0);
   3705 
   3706     // Form a REG_SEQUENCE to force register allocation.
   3707     SDValue V0 = N->getOperand(0);
   3708     SDValue V1 = N->getOperand(1);
   3709     SDValue RegSeq = SDValue(createDRegPairNode(MVT::v16i8, V0, V1), 0);
   3710 
   3711     SDValue Ops[] = {RegSeq, N->getOperand(2), getAL(CurDAG, dl), // Predicate
   3712                      CurDAG->getRegister(0, MVT::i32)}; // Predicate Register
   3713     ReplaceNode(N, CurDAG->getMachineNode(ARM::VTBL2, dl, VT, Ops));
   3714     return;
   3715   }
   3716 
   3717   case ISD::CONCAT_VECTORS:
   3718     SelectConcatVector(N);
   3719     return;
   3720 
   3721   case ISD::ATOMIC_CMP_SWAP:
   3722     SelectCMP_SWAP(N);
   3723     return;
   3724   }
   3725 
   3726   SelectCode(N);
   3727 }
   3728 
   3729 // Inspect a register string of the form
   3730 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
   3731 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
   3732 // and obtain the integer operands from them, adding these operands to the
   3733 // provided vector.
   3734 static void getIntOperandsFromRegisterString(StringRef RegString,
   3735                                              SelectionDAG *CurDAG,
   3736                                              const SDLoc &DL,
   3737                                              std::vector<SDValue> &Ops) {
   3738   SmallVector<StringRef, 5> Fields;
   3739   RegString.split(Fields, ':');
   3740 
   3741   if (Fields.size() > 1) {
   3742     bool AllIntFields = true;
   3743 
   3744     for (StringRef Field : Fields) {
   3745       // Need to trim out leading 'cp' characters and get the integer field.
   3746       unsigned IntField;
   3747       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
   3748       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
   3749     }
   3750 
   3751     assert(AllIntFields &&
   3752             "Unexpected non-integer value in special register string.");
   3753   }
   3754 }
   3755 
   3756 // Maps a Banked Register string to its mask value. The mask value returned is
   3757 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
   3758 // mask operand, which expresses which register is to be used, e.g. r8, and in
   3759 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
   3760 // was invalid.
   3761 static inline int getBankedRegisterMask(StringRef RegString) {
   3762   return StringSwitch<int>(RegString.lower())
   3763           .Case("r8_usr", 0x00)
   3764           .Case("r9_usr", 0x01)
   3765           .Case("r10_usr", 0x02)
   3766           .Case("r11_usr", 0x03)
   3767           .Case("r12_usr", 0x04)
   3768           .Case("sp_usr", 0x05)
   3769           .Case("lr_usr", 0x06)
   3770           .Case("r8_fiq", 0x08)
   3771           .Case("r9_fiq", 0x09)
   3772           .Case("r10_fiq", 0x0a)
   3773           .Case("r11_fiq", 0x0b)
   3774           .Case("r12_fiq", 0x0c)
   3775           .Case("sp_fiq", 0x0d)
   3776           .Case("lr_fiq", 0x0e)
   3777           .Case("lr_irq", 0x10)
   3778           .Case("sp_irq", 0x11)
   3779           .Case("lr_svc", 0x12)
   3780           .Case("sp_svc", 0x13)
   3781           .Case("lr_abt", 0x14)
   3782           .Case("sp_abt", 0x15)
   3783           .Case("lr_und", 0x16)
   3784           .Case("sp_und", 0x17)
   3785           .Case("lr_mon", 0x1c)
   3786           .Case("sp_mon", 0x1d)
   3787           .Case("elr_hyp", 0x1e)
   3788           .Case("sp_hyp", 0x1f)
   3789           .Case("spsr_fiq", 0x2e)
   3790           .Case("spsr_irq", 0x30)
   3791           .Case("spsr_svc", 0x32)
   3792           .Case("spsr_abt", 0x34)
   3793           .Case("spsr_und", 0x36)
   3794           .Case("spsr_mon", 0x3c)
   3795           .Case("spsr_hyp", 0x3e)
   3796           .Default(-1);
   3797 }
   3798 
   3799 // Maps a MClass special register string to its value for use in the
   3800 // t2MRS_M / t2MSR_M instruction nodes as the SYSm value operand.
   3801 // Returns -1 to signify that the string was invalid.
   3802 static inline int getMClassRegisterSYSmValueMask(StringRef RegString) {
   3803   return StringSwitch<int>(RegString.lower())
   3804           .Case("apsr", 0x0)
   3805           .Case("iapsr", 0x1)
   3806           .Case("eapsr", 0x2)
   3807           .Case("xpsr", 0x3)
   3808           .Case("ipsr", 0x5)
   3809           .Case("epsr", 0x6)
   3810           .Case("iepsr", 0x7)
   3811           .Case("msp", 0x8)
   3812           .Case("psp", 0x9)
   3813           .Case("primask", 0x10)
   3814           .Case("basepri", 0x11)
   3815           .Case("basepri_max", 0x12)
   3816           .Case("faultmask", 0x13)
   3817           .Case("control", 0x14)
   3818           .Case("msplim", 0x0a)
   3819           .Case("psplim", 0x0b)
   3820           .Case("sp", 0x18)
   3821           .Default(-1);
   3822 }
   3823 
   3824 // The flags here are common to those allowed for apsr in the A class cores and
   3825 // those allowed for the special registers in the M class cores. Returns a
   3826 // value representing which flags were present, -1 if invalid.
   3827 static inline int getMClassFlagsMask(StringRef Flags, bool hasDSP) {
   3828   if (Flags.empty())
   3829     return 0x2 | (int)hasDSP;
   3830 
   3831   return StringSwitch<int>(Flags)
   3832           .Case("g", 0x1)
   3833           .Case("nzcvq", 0x2)
   3834           .Case("nzcvqg", 0x3)
   3835           .Default(-1);
   3836 }
   3837 
   3838 static int getMClassRegisterMask(StringRef Reg, StringRef Flags, bool IsRead,
   3839                                  const ARMSubtarget *Subtarget) {
   3840   // Ensure that the register (without flags) was a valid M Class special
   3841   // register.
   3842   int SYSmvalue = getMClassRegisterSYSmValueMask(Reg);
   3843   if (SYSmvalue == -1)
   3844     return -1;
   3845 
   3846   // basepri, basepri_max and faultmask are only valid for V7m.
   3847   if (!Subtarget->hasV7Ops() && SYSmvalue >= 0x11 && SYSmvalue <= 0x13)
   3848     return -1;
   3849 
   3850   if (Subtarget->has8MSecExt() && Flags.lower() == "ns") {
   3851     Flags = "";
   3852     SYSmvalue |= 0x80;
   3853   }
   3854 
   3855   if (!Subtarget->has8MSecExt() &&
   3856       (SYSmvalue == 0xa || SYSmvalue == 0xb || SYSmvalue > 0x14))
   3857     return -1;
   3858 
   3859   if (!Subtarget->hasV8MMainlineOps() &&
   3860       (SYSmvalue == 0x8a || SYSmvalue == 0x8b || SYSmvalue == 0x91 ||
   3861        SYSmvalue == 0x93))
   3862     return -1;
   3863 
   3864   // If it was a read then we won't be expecting flags and so at this point
   3865   // we can return the mask.
   3866   if (IsRead) {
   3867     if (Flags.empty())
   3868       return SYSmvalue;
   3869     else
   3870       return -1;
   3871   }
   3872 
   3873   // We know we are now handling a write so need to get the mask for the flags.
   3874   int Mask = getMClassFlagsMask(Flags, Subtarget->hasDSP());
   3875 
   3876   // Only apsr, iapsr, eapsr, xpsr can have flags. The other register values
   3877   // shouldn't have flags present.
   3878   if ((SYSmvalue < 0x4 && Mask == -1) || (SYSmvalue > 0x4 && !Flags.empty()))
   3879     return -1;
   3880 
   3881   // The _g and _nzcvqg versions are only valid if the DSP extension is
   3882   // available.
   3883   if (!Subtarget->hasDSP() && (Mask & 0x1))
   3884     return -1;
   3885 
   3886   // The register was valid so need to put the mask in the correct place
   3887   // (the flags need to be in bits 11-10) and combine with the SYSmvalue to
   3888   // construct the operand for the instruction node.
   3889   if (SYSmvalue < 0x4)
   3890     return SYSmvalue | Mask << 10;
   3891 
   3892   return SYSmvalue;
   3893 }
   3894 
   3895 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
   3896   // The mask operand contains the special register (R Bit) in bit 4, whether
   3897   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
   3898   // bits 3-0 contains the fields to be accessed in the special register, set by
   3899   // the flags provided with the register.
   3900   int Mask = 0;
   3901   if (Reg == "apsr") {
   3902     // The flags permitted for apsr are the same flags that are allowed in
   3903     // M class registers. We get the flag value and then shift the flags into
   3904     // the correct place to combine with the mask.
   3905     Mask = getMClassFlagsMask(Flags, true);
   3906     if (Mask == -1)
   3907       return -1;
   3908     return Mask << 2;
   3909   }
   3910 
   3911   if (Reg != "cpsr" && Reg != "spsr") {
   3912     return -1;
   3913   }
   3914 
   3915   // This is the same as if the flags were "fc"
   3916   if (Flags.empty() || Flags == "all")
   3917     return Mask | 0x9;
   3918 
   3919   // Inspect the supplied flags string and set the bits in the mask for
   3920   // the relevant and valid flags allowed for cpsr and spsr.
   3921   for (char Flag : Flags) {
   3922     int FlagVal;
   3923     switch (Flag) {
   3924       case 'c':
   3925         FlagVal = 0x1;
   3926         break;
   3927       case 'x':
   3928         FlagVal = 0x2;
   3929         break;
   3930       case 's':
   3931         FlagVal = 0x4;
   3932         break;
   3933       case 'f':
   3934         FlagVal = 0x8;
   3935         break;
   3936       default:
   3937         FlagVal = 0;
   3938     }
   3939 
   3940     // This avoids allowing strings where the same flag bit appears twice.
   3941     if (!FlagVal || (Mask & FlagVal))
   3942       return -1;
   3943     Mask |= FlagVal;
   3944   }
   3945 
   3946   // If the register is spsr then we need to set the R bit.
   3947   if (Reg == "spsr")
   3948     Mask |= 0x10;
   3949 
   3950   return Mask;
   3951 }
   3952 
   3953 // Lower the read_register intrinsic to ARM specific DAG nodes
   3954 // using the supplied metadata string to select the instruction node to use
   3955 // and the registers/masks to construct as operands for the node.
   3956 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
   3957   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   3958   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   3959   bool IsThumb2 = Subtarget->isThumb2();
   3960   SDLoc DL(N);
   3961 
   3962   std::vector<SDValue> Ops;
   3963   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
   3964 
   3965   if (!Ops.empty()) {
   3966     // If the special register string was constructed of fields (as defined
   3967     // in the ACLE) then need to lower to MRC node (32 bit) or
   3968     // MRRC node(64 bit), we can make the distinction based on the number of
   3969     // operands we have.
   3970     unsigned Opcode;
   3971     SmallVector<EVT, 3> ResTypes;
   3972     if (Ops.size() == 5){
   3973       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
   3974       ResTypes.append({ MVT::i32, MVT::Other });
   3975     } else {
   3976       assert(Ops.size() == 3 &&
   3977               "Invalid number of fields in special register string.");
   3978       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
   3979       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
   3980     }
   3981 
   3982     Ops.push_back(getAL(CurDAG, DL));
   3983     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   3984     Ops.push_back(N->getOperand(0));
   3985     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
   3986     return true;
   3987   }
   3988 
   3989   std::string SpecialReg = RegString->getString().lower();
   3990 
   3991   int BankedReg = getBankedRegisterMask(SpecialReg);
   3992   if (BankedReg != -1) {
   3993     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
   3994             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   3995             N->getOperand(0) };
   3996     ReplaceNode(
   3997         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
   3998                                   DL, MVT::i32, MVT::Other, Ops));
   3999     return true;
   4000   }
   4001 
   4002   // The VFP registers are read by creating SelectionDAG nodes with opcodes
   4003   // corresponding to the register that is being read from. So we switch on the
   4004   // string to find which opcode we need to use.
   4005   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
   4006                     .Case("fpscr", ARM::VMRS)
   4007                     .Case("fpexc", ARM::VMRS_FPEXC)
   4008                     .Case("fpsid", ARM::VMRS_FPSID)
   4009                     .Case("mvfr0", ARM::VMRS_MVFR0)
   4010                     .Case("mvfr1", ARM::VMRS_MVFR1)
   4011                     .Case("mvfr2", ARM::VMRS_MVFR2)
   4012                     .Case("fpinst", ARM::VMRS_FPINST)
   4013                     .Case("fpinst2", ARM::VMRS_FPINST2)
   4014                     .Default(0);
   4015 
   4016   // If an opcode was found then we can lower the read to a VFP instruction.
   4017   if (Opcode) {
   4018     if (!Subtarget->hasVFP2())
   4019       return false;
   4020     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8())
   4021       return false;
   4022 
   4023     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4024             N->getOperand(0) };
   4025     ReplaceNode(N,
   4026                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
   4027     return true;
   4028   }
   4029 
   4030   // If the target is M Class then need to validate that the register string
   4031   // is an acceptable value, so check that a mask can be constructed from the
   4032   // string.
   4033   if (Subtarget->isMClass()) {
   4034     StringRef Flags = "", Reg = SpecialReg;
   4035     if (Reg.endswith("_ns")) {
   4036       Flags = "ns";
   4037       Reg = Reg.drop_back(3);
   4038     }
   4039 
   4040     int SYSmValue = getMClassRegisterMask(Reg, Flags, true, Subtarget);
   4041     if (SYSmValue == -1)
   4042       return false;
   4043 
   4044     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
   4045                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4046                       N->getOperand(0) };
   4047     ReplaceNode(
   4048         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
   4049     return true;
   4050   }
   4051 
   4052   // Here we know the target is not M Class so we need to check if it is one
   4053   // of the remaining possible values which are apsr, cpsr or spsr.
   4054   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
   4055     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4056             N->getOperand(0) };
   4057     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
   4058                                           DL, MVT::i32, MVT::Other, Ops));
   4059     return true;
   4060   }
   4061 
   4062   if (SpecialReg == "spsr") {
   4063     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4064             N->getOperand(0) };
   4065     ReplaceNode(
   4066         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
   4067                                   MVT::i32, MVT::Other, Ops));
   4068     return true;
   4069   }
   4070 
   4071   return false;
   4072 }
   4073 
   4074 // Lower the write_register intrinsic to ARM specific DAG nodes
   4075 // using the supplied metadata string to select the instruction node to use
   4076 // and the registers/masks to use in the nodes
   4077 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
   4078   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   4079   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   4080   bool IsThumb2 = Subtarget->isThumb2();
   4081   SDLoc DL(N);
   4082 
   4083   std::vector<SDValue> Ops;
   4084   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
   4085 
   4086   if (!Ops.empty()) {
   4087     // If the special register string was constructed of fields (as defined
   4088     // in the ACLE) then need to lower to MCR node (32 bit) or
   4089     // MCRR node(64 bit), we can make the distinction based on the number of
   4090     // operands we have.
   4091     unsigned Opcode;
   4092     if (Ops.size() == 5) {
   4093       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
   4094       Ops.insert(Ops.begin()+2, N->getOperand(2));
   4095     } else {
   4096       assert(Ops.size() == 3 &&
   4097               "Invalid number of fields in special register string.");
   4098       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
   4099       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
   4100       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
   4101     }
   4102 
   4103     Ops.push_back(getAL(CurDAG, DL));
   4104     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
   4105     Ops.push_back(N->getOperand(0));
   4106 
   4107     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
   4108     return true;
   4109   }
   4110 
   4111   std::string SpecialReg = RegString->getString().lower();
   4112   int BankedReg = getBankedRegisterMask(SpecialReg);
   4113   if (BankedReg != -1) {
   4114     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
   4115             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4116             N->getOperand(0) };
   4117     ReplaceNode(
   4118         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
   4119                                   DL, MVT::Other, Ops));
   4120     return true;
   4121   }
   4122 
   4123   // The VFP registers are written to by creating SelectionDAG nodes with
   4124   // opcodes corresponding to the register that is being written. So we switch
   4125   // on the string to find which opcode we need to use.
   4126   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
   4127                     .Case("fpscr", ARM::VMSR)
   4128                     .Case("fpexc", ARM::VMSR_FPEXC)
   4129                     .Case("fpsid", ARM::VMSR_FPSID)
   4130                     .Case("fpinst", ARM::VMSR_FPINST)
   4131                     .Case("fpinst2", ARM::VMSR_FPINST2)
   4132                     .Default(0);
   4133 
   4134   if (Opcode) {
   4135     if (!Subtarget->hasVFP2())
   4136       return false;
   4137     Ops = { N->getOperand(2), getAL(CurDAG, DL),
   4138             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
   4139     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
   4140     return true;
   4141   }
   4142 
   4143   std::pair<StringRef, StringRef> Fields;
   4144   Fields = StringRef(SpecialReg).rsplit('_');
   4145   std::string Reg = Fields.first.str();
   4146   StringRef Flags = Fields.second;
   4147 
   4148   // If the target was M Class then need to validate the special register value
   4149   // and retrieve the mask for use in the instruction node.
   4150   if (Subtarget->isMClass()) {
   4151     // basepri_max gets split so need to correct Reg and Flags.
   4152     if (SpecialReg == "basepri_max") {
   4153       Reg = SpecialReg;
   4154       Flags = "";
   4155     }
   4156     int SYSmValue = getMClassRegisterMask(Reg, Flags, false, Subtarget);
   4157     if (SYSmValue == -1)
   4158       return false;
   4159 
   4160     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
   4161                       N->getOperand(2), getAL(CurDAG, DL),
   4162                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
   4163     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
   4164     return true;
   4165   }
   4166 
   4167   // We then check to see if a valid mask can be constructed for one of the
   4168   // register string values permitted for the A and R class cores. These values
   4169   // are apsr, spsr and cpsr; these are also valid on older cores.
   4170   int Mask = getARClassRegisterMask(Reg, Flags);
   4171   if (Mask != -1) {
   4172     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
   4173             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
   4174             N->getOperand(0) };
   4175     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
   4176                                           DL, MVT::Other, Ops));
   4177     return true;
   4178   }
   4179 
   4180   return false;
   4181 }
   4182 
   4183 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
   4184   std::vector<SDValue> AsmNodeOperands;
   4185   unsigned Flag, Kind;
   4186   bool Changed = false;
   4187   unsigned NumOps = N->getNumOperands();
   4188 
   4189   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
   4190   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
   4191   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
   4192   // respectively. Since there is no constraint to explicitly specify a
   4193   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
   4194   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
   4195   // them into a GPRPair.
   4196 
   4197   SDLoc dl(N);
   4198   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1)
   4199                                    : SDValue(nullptr,0);
   4200 
   4201   SmallVector<bool, 8> OpChanged;
   4202   // Glue node will be appended late.
   4203   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
   4204     SDValue op = N->getOperand(i);
   4205     AsmNodeOperands.push_back(op);
   4206 
   4207     if (i < InlineAsm::Op_FirstOperand)
   4208       continue;
   4209 
   4210     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) {
   4211       Flag = C->getZExtValue();
   4212       Kind = InlineAsm::getKind(Flag);
   4213     }
   4214     else
   4215       continue;
   4216 
   4217     // Immediate operands to inline asm in the SelectionDAG are modeled with
   4218     // two operands. The first is a constant of value InlineAsm::Kind_Imm, and
   4219     // the second is a constant with the value of the immediate. If we get here
   4220     // and we have a Kind_Imm, skip the next operand, and continue.
   4221     if (Kind == InlineAsm::Kind_Imm) {
   4222       SDValue op = N->getOperand(++i);
   4223       AsmNodeOperands.push_back(op);
   4224       continue;
   4225     }
   4226 
   4227     unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag);
   4228     if (NumRegs)
   4229       OpChanged.push_back(false);
   4230 
   4231     unsigned DefIdx = 0;
   4232     bool IsTiedToChangedOp = false;
   4233     // If it's a use that is tied with a previous def, it has no
   4234     // reg class constraint.
   4235     if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx))
   4236       IsTiedToChangedOp = OpChanged[DefIdx];
   4237 
   4238     if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef
   4239         && Kind != InlineAsm::Kind_RegDefEarlyClobber)
   4240       continue;
   4241 
   4242     unsigned RC;
   4243     bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC);
   4244     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
   4245         || NumRegs != 2)
   4246       continue;
   4247 
   4248     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
   4249     SDValue V0 = N->getOperand(i+1);
   4250     SDValue V1 = N->getOperand(i+2);
   4251     unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg();
   4252     unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg();
   4253     SDValue PairedReg;
   4254     MachineRegisterInfo &MRI = MF->getRegInfo();
   4255 
   4256     if (Kind == InlineAsm::Kind_RegDef ||
   4257         Kind == InlineAsm::Kind_RegDefEarlyClobber) {
   4258       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
   4259       // the original GPRs.
   4260 
   4261       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   4262       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
   4263       SDValue Chain = SDValue(N,0);
   4264 
   4265       SDNode *GU = N->getGluedUser();
   4266       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
   4267                                                Chain.getValue(1));
   4268 
   4269       // Extract values from a GPRPair reg and copy to the original GPR reg.
   4270       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
   4271                                                     RegCopy);
   4272       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
   4273                                                     RegCopy);
   4274       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
   4275                                         RegCopy.getValue(1));
   4276       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
   4277 
   4278       // Update the original glue user.
   4279       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
   4280       Ops.push_back(T1.getValue(1));
   4281       CurDAG->UpdateNodeOperands(GU, Ops);
   4282     }
   4283     else {
   4284       // For Kind  == InlineAsm::Kind_RegUse, we first copy two GPRs into a
   4285       // GPRPair and then pass the GPRPair to the inline asm.
   4286       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
   4287 
   4288       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
   4289       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
   4290                                           Chain.getValue(1));
   4291       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
   4292                                           T0.getValue(1));
   4293       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
   4294 
   4295       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
   4296       // i32 VRs of inline asm with it.
   4297       unsigned GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
   4298       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
   4299       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
   4300 
   4301       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
   4302       Glue = Chain.getValue(1);
   4303     }
   4304 
   4305     Changed = true;
   4306 
   4307     if(PairedReg.getNode()) {
   4308       OpChanged[OpChanged.size() -1 ] = true;
   4309       Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/);
   4310       if (IsTiedToChangedOp)
   4311         Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx);
   4312       else
   4313         Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID);
   4314       // Replace the current flag.
   4315       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
   4316           Flag, dl, MVT::i32);
   4317       // Add the new register node and skip the original two GPRs.
   4318       AsmNodeOperands.push_back(PairedReg);
   4319       // Skip the next two GPRs.
   4320       i += 2;
   4321     }
   4322   }
   4323 
   4324   if (Glue.getNode())
   4325     AsmNodeOperands.push_back(Glue);
   4326   if (!Changed)
   4327     return false;
   4328 
   4329   SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N),
   4330       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
   4331   New->setNodeId(-1);
   4332   ReplaceNode(N, New.getNode());
   4333   return true;
   4334 }
   4335 
   4336 
   4337 bool ARMDAGToDAGISel::
   4338 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   4339                              std::vector<SDValue> &OutOps) {
   4340   switch(ConstraintID) {
   4341   default:
   4342     llvm_unreachable("Unexpected asm memory constraint");
   4343   case InlineAsm::Constraint_i:
   4344     // FIXME: It seems strange that 'i' is needed here since it's supposed to
   4345     //        be an immediate and not a memory constraint.
   4346     // Fallthrough.
   4347   case InlineAsm::Constraint_m:
   4348   case InlineAsm::Constraint_o:
   4349   case InlineAsm::Constraint_Q:
   4350   case InlineAsm::Constraint_Um:
   4351   case InlineAsm::Constraint_Un:
   4352   case InlineAsm::Constraint_Uq:
   4353   case InlineAsm::Constraint_Us:
   4354   case InlineAsm::Constraint_Ut:
   4355   case InlineAsm::Constraint_Uv:
   4356   case InlineAsm::Constraint_Uy:
   4357     // Require the address to be in a register.  That is safe for all ARM
   4358     // variants and it is hard to do anything much smarter without knowing
   4359     // how the operand is used.
   4360     OutOps.push_back(Op);
   4361     return false;
   4362   }
   4363   return true;
   4364 }
   4365 
   4366 /// createARMISelDag - This pass converts a legalized DAG into a
   4367 /// ARM-specific DAG, ready for instruction scheduling.
   4368 ///
   4369 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
   4370                                      CodeGenOpt::Level OptLevel) {
   4371   return new ARMDAGToDAGISel(TM, OptLevel);
   4372 }
   4373