Home | History | Annotate | Download | only in X86
      1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines a DAG pattern matching instruction selector for X86,
     11 // converting from a legalized dag to a X86 dag.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "X86.h"
     16 #include "X86InstrBuilder.h"
     17 #include "X86MachineFunctionInfo.h"
     18 #include "X86RegisterInfo.h"
     19 #include "X86Subtarget.h"
     20 #include "X86TargetMachine.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 #include "llvm/CodeGen/SelectionDAGISel.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/IR/Instructions.h"
     29 #include "llvm/IR/Intrinsics.h"
     30 #include "llvm/IR/Type.h"
     31 #include "llvm/Support/Debug.h"
     32 #include "llvm/Support/ErrorHandling.h"
     33 #include "llvm/Support/MathExtras.h"
     34 #include "llvm/Support/raw_ostream.h"
     35 #include "llvm/Target/TargetMachine.h"
     36 #include "llvm/Target/TargetOptions.h"
     37 #include <stdint.h>
     38 using namespace llvm;
     39 
     40 #define DEBUG_TYPE "x86-isel"
     41 
     42 STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
     43 
     44 //===----------------------------------------------------------------------===//
     45 //                      Pattern Matcher Implementation
     46 //===----------------------------------------------------------------------===//
     47 
     48 namespace {
     49   /// This corresponds to X86AddressMode, but uses SDValue's instead of register
     50   /// numbers for the leaves of the matched tree.
     51   struct X86ISelAddressMode {
     52     enum {
     53       RegBase,
     54       FrameIndexBase
     55     } BaseType;
     56 
     57     // This is really a union, discriminated by BaseType!
     58     SDValue Base_Reg;
     59     int Base_FrameIndex;
     60 
     61     unsigned Scale;
     62     SDValue IndexReg;
     63     int32_t Disp;
     64     SDValue Segment;
     65     const GlobalValue *GV;
     66     const Constant *CP;
     67     const BlockAddress *BlockAddr;
     68     const char *ES;
     69     MCSymbol *MCSym;
     70     int JT;
     71     unsigned Align;    // CP alignment.
     72     unsigned char SymbolFlags;  // X86II::MO_*
     73 
     74     X86ISelAddressMode()
     75         : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
     76           Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr),
     77           MCSym(nullptr), JT(-1), Align(0), SymbolFlags(X86II::MO_NO_FLAG) {}
     78 
     79     bool hasSymbolicDisplacement() const {
     80       return GV != nullptr || CP != nullptr || ES != nullptr ||
     81              MCSym != nullptr || JT != -1 || BlockAddr != nullptr;
     82     }
     83 
     84     bool hasBaseOrIndexReg() const {
     85       return BaseType == FrameIndexBase ||
     86              IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
     87     }
     88 
     89     /// Return true if this addressing mode is already RIP-relative.
     90     bool isRIPRelative() const {
     91       if (BaseType != RegBase) return false;
     92       if (RegisterSDNode *RegNode =
     93             dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode()))
     94         return RegNode->getReg() == X86::RIP;
     95       return false;
     96     }
     97 
     98     void setBaseReg(SDValue Reg) {
     99       BaseType = RegBase;
    100       Base_Reg = Reg;
    101     }
    102 
    103 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    104     void dump() {
    105       dbgs() << "X86ISelAddressMode " << this << '\n';
    106       dbgs() << "Base_Reg ";
    107       if (Base_Reg.getNode())
    108         Base_Reg.getNode()->dump();
    109       else
    110         dbgs() << "nul";
    111       dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'
    112              << " Scale" << Scale << '\n'
    113              << "IndexReg ";
    114       if (IndexReg.getNode())
    115         IndexReg.getNode()->dump();
    116       else
    117         dbgs() << "nul";
    118       dbgs() << " Disp " << Disp << '\n'
    119              << "GV ";
    120       if (GV)
    121         GV->dump();
    122       else
    123         dbgs() << "nul";
    124       dbgs() << " CP ";
    125       if (CP)
    126         CP->dump();
    127       else
    128         dbgs() << "nul";
    129       dbgs() << '\n'
    130              << "ES ";
    131       if (ES)
    132         dbgs() << ES;
    133       else
    134         dbgs() << "nul";
    135       dbgs() << " MCSym ";
    136       if (MCSym)
    137         dbgs() << MCSym;
    138       else
    139         dbgs() << "nul";
    140       dbgs() << " JT" << JT << " Align" << Align << '\n';
    141     }
    142 #endif
    143   };
    144 }
    145 
    146 namespace {
    147   //===--------------------------------------------------------------------===//
    148   /// ISel - X86-specific code to select X86 machine instructions for
    149   /// SelectionDAG operations.
    150   ///
    151   class X86DAGToDAGISel final : public SelectionDAGISel {
    152     /// Keep a pointer to the X86Subtarget around so that we can
    153     /// make the right decision when generating code for different targets.
    154     const X86Subtarget *Subtarget;
    155 
    156     /// If true, selector should try to optimize for code size instead of
    157     /// performance.
    158     bool OptForSize;
    159 
    160     /// If true, selector should try to optimize for minimum code size.
    161     bool OptForMinSize;
    162 
    163   public:
    164     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
    165         : SelectionDAGISel(tm, OptLevel), OptForSize(false),
    166           OptForMinSize(false) {}
    167 
    168     const char *getPassName() const override {
    169       return "X86 DAG->DAG Instruction Selection";
    170     }
    171 
    172     bool runOnMachineFunction(MachineFunction &MF) override {
    173       // Reset the subtarget each time through.
    174       Subtarget = &MF.getSubtarget<X86Subtarget>();
    175       SelectionDAGISel::runOnMachineFunction(MF);
    176       return true;
    177     }
    178 
    179     void EmitFunctionEntryCode() override;
    180 
    181     bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override;
    182 
    183     void PreprocessISelDAG() override;
    184 
    185     inline bool immSext8(SDNode *N) const {
    186       return isInt<8>(cast<ConstantSDNode>(N)->getSExtValue());
    187     }
    188 
    189     // True if the 64-bit immediate fits in a 32-bit sign-extended field.
    190     inline bool i64immSExt32(SDNode *N) const {
    191       uint64_t v = cast<ConstantSDNode>(N)->getZExtValue();
    192       return (int64_t)v == (int32_t)v;
    193     }
    194 
    195 // Include the pieces autogenerated from the target description.
    196 #include "X86GenDAGISel.inc"
    197 
    198   private:
    199     SDNode *Select(SDNode *N) override;
    200     SDNode *selectGather(SDNode *N, unsigned Opc);
    201     SDNode *selectAtomicLoadArith(SDNode *Node, MVT NVT);
    202 
    203     bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
    204     bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
    205     bool matchWrapper(SDValue N, X86ISelAddressMode &AM);
    206     bool matchAddress(SDValue N, X86ISelAddressMode &AM);
    207     bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
    208     bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
    209                                  unsigned Depth);
    210     bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
    211     bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
    212                     SDValue &Scale, SDValue &Index, SDValue &Disp,
    213                     SDValue &Segment);
    214     bool selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
    215                           SDValue &Scale, SDValue &Index, SDValue &Disp,
    216                           SDValue &Segment);
    217     bool selectMOV64Imm32(SDValue N, SDValue &Imm);
    218     bool selectLEAAddr(SDValue N, SDValue &Base,
    219                        SDValue &Scale, SDValue &Index, SDValue &Disp,
    220                        SDValue &Segment);
    221     bool selectLEA64_32Addr(SDValue N, SDValue &Base,
    222                             SDValue &Scale, SDValue &Index, SDValue &Disp,
    223                             SDValue &Segment);
    224     bool selectTLSADDRAddr(SDValue N, SDValue &Base,
    225                            SDValue &Scale, SDValue &Index, SDValue &Disp,
    226                            SDValue &Segment);
    227     bool selectScalarSSELoad(SDNode *Root, SDValue N,
    228                              SDValue &Base, SDValue &Scale,
    229                              SDValue &Index, SDValue &Disp,
    230                              SDValue &Segment,
    231                              SDValue &NodeWithChain);
    232 
    233     bool tryFoldLoad(SDNode *P, SDValue N,
    234                      SDValue &Base, SDValue &Scale,
    235                      SDValue &Index, SDValue &Disp,
    236                      SDValue &Segment);
    237 
    238     /// Implement addressing mode selection for inline asm expressions.
    239     bool SelectInlineAsmMemoryOperand(const SDValue &Op,
    240                                       unsigned ConstraintID,
    241                                       std::vector<SDValue> &OutOps) override;
    242 
    243     void emitSpecialCodeForMain();
    244 
    245     inline void getAddressOperands(X86ISelAddressMode &AM, SDLoc DL,
    246                                    SDValue &Base, SDValue &Scale,
    247                                    SDValue &Index, SDValue &Disp,
    248                                    SDValue &Segment) {
    249       Base = (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
    250                  ? CurDAG->getTargetFrameIndex(
    251                        AM.Base_FrameIndex,
    252                        TLI->getPointerTy(CurDAG->getDataLayout()))
    253                  : AM.Base_Reg;
    254       Scale = getI8Imm(AM.Scale, DL);
    255       Index = AM.IndexReg;
    256       // These are 32-bit even in 64-bit mode since RIP-relative offset
    257       // is 32-bit.
    258       if (AM.GV)
    259         Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(),
    260                                               MVT::i32, AM.Disp,
    261                                               AM.SymbolFlags);
    262       else if (AM.CP)
    263         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
    264                                              AM.Align, AM.Disp, AM.SymbolFlags);
    265       else if (AM.ES) {
    266         assert(!AM.Disp && "Non-zero displacement is ignored with ES.");
    267         Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
    268       } else if (AM.MCSym) {
    269         assert(!AM.Disp && "Non-zero displacement is ignored with MCSym.");
    270         assert(AM.SymbolFlags == 0 && "oo");
    271         Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32);
    272       } else if (AM.JT != -1) {
    273         assert(!AM.Disp && "Non-zero displacement is ignored with JT.");
    274         Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
    275       } else if (AM.BlockAddr)
    276         Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp,
    277                                              AM.SymbolFlags);
    278       else
    279         Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32);
    280 
    281       if (AM.Segment.getNode())
    282         Segment = AM.Segment;
    283       else
    284         Segment = CurDAG->getRegister(0, MVT::i32);
    285     }
    286 
    287     // Utility function to determine whether we should avoid selecting
    288     // immediate forms of instructions for better code size or not.
    289     // At a high level, we'd like to avoid such instructions when
    290     // we have similar constants used within the same basic block
    291     // that can be kept in a register.
    292     //
    293     bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const {
    294       uint32_t UseCount = 0;
    295 
    296       // Do not want to hoist if we're not optimizing for size.
    297       // TODO: We'd like to remove this restriction.
    298       // See the comment in X86InstrInfo.td for more info.
    299       if (!OptForSize)
    300         return false;
    301 
    302       // Walk all the users of the immediate.
    303       for (SDNode::use_iterator UI = N->use_begin(),
    304            UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) {
    305 
    306         SDNode *User = *UI;
    307 
    308         // This user is already selected. Count it as a legitimate use and
    309         // move on.
    310         if (User->isMachineOpcode()) {
    311           UseCount++;
    312           continue;
    313         }
    314 
    315         // We want to count stores of immediates as real uses.
    316         if (User->getOpcode() == ISD::STORE &&
    317             User->getOperand(1).getNode() == N) {
    318           UseCount++;
    319           continue;
    320         }
    321 
    322         // We don't currently match users that have > 2 operands (except
    323         // for stores, which are handled above)
    324         // Those instruction won't match in ISEL, for now, and would
    325         // be counted incorrectly.
    326         // This may change in the future as we add additional instruction
    327         // types.
    328         if (User->getNumOperands() != 2)
    329           continue;
    330 
    331         // Immediates that are used for offsets as part of stack
    332         // manipulation should be left alone. These are typically
    333         // used to indicate SP offsets for argument passing and
    334         // will get pulled into stores/pushes (implicitly).
    335         if (User->getOpcode() == X86ISD::ADD ||
    336             User->getOpcode() == ISD::ADD    ||
    337             User->getOpcode() == X86ISD::SUB ||
    338             User->getOpcode() == ISD::SUB) {
    339 
    340           // Find the other operand of the add/sub.
    341           SDValue OtherOp = User->getOperand(0);
    342           if (OtherOp.getNode() == N)
    343             OtherOp = User->getOperand(1);
    344 
    345           // Don't count if the other operand is SP.
    346           RegisterSDNode *RegNode;
    347           if (OtherOp->getOpcode() == ISD::CopyFromReg &&
    348               (RegNode = dyn_cast_or_null<RegisterSDNode>(
    349                  OtherOp->getOperand(1).getNode())))
    350             if ((RegNode->getReg() == X86::ESP) ||
    351                 (RegNode->getReg() == X86::RSP))
    352               continue;
    353         }
    354 
    355         // ... otherwise, count this and move on.
    356         UseCount++;
    357       }
    358 
    359       // If we have more than 1 use, then recommend for hoisting.
    360       return (UseCount > 1);
    361     }
    362 
    363     /// Return a target constant with the specified value of type i8.
    364     inline SDValue getI8Imm(unsigned Imm, SDLoc DL) {
    365       return CurDAG->getTargetConstant(Imm, DL, MVT::i8);
    366     }
    367 
    368     /// Return a target constant with the specified value, of type i32.
    369     inline SDValue getI32Imm(unsigned Imm, SDLoc DL) {
    370       return CurDAG->getTargetConstant(Imm, DL, MVT::i32);
    371     }
    372 
    373     /// Return an SDNode that returns the value of the global base register.
    374     /// Output instructions required to initialize the global base register,
    375     /// if necessary.
    376     SDNode *getGlobalBaseReg();
    377 
    378     /// Return a reference to the TargetMachine, casted to the target-specific
    379     /// type.
    380     const X86TargetMachine &getTargetMachine() const {
    381       return static_cast<const X86TargetMachine &>(TM);
    382     }
    383 
    384     /// Return a reference to the TargetInstrInfo, casted to the target-specific
    385     /// type.
    386     const X86InstrInfo *getInstrInfo() const {
    387       return Subtarget->getInstrInfo();
    388     }
    389 
    390     /// \brief Address-mode matching performs shift-of-and to and-of-shift
    391     /// reassociation in order to expose more scaled addressing
    392     /// opportunities.
    393     bool ComplexPatternFuncMutatesDAG() const override {
    394       return true;
    395     }
    396   };
    397 }
    398 
    399 
    400 bool
    401 X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
    402   if (OptLevel == CodeGenOpt::None) return false;
    403 
    404   if (!N.hasOneUse())
    405     return false;
    406 
    407   if (N.getOpcode() != ISD::LOAD)
    408     return true;
    409 
    410   // If N is a load, do additional profitability checks.
    411   if (U == Root) {
    412     switch (U->getOpcode()) {
    413     default: break;
    414     case X86ISD::ADD:
    415     case X86ISD::SUB:
    416     case X86ISD::AND:
    417     case X86ISD::XOR:
    418     case X86ISD::OR:
    419     case ISD::ADD:
    420     case ISD::ADDC:
    421     case ISD::ADDE:
    422     case ISD::AND:
    423     case ISD::OR:
    424     case ISD::XOR: {
    425       SDValue Op1 = U->getOperand(1);
    426 
    427       // If the other operand is a 8-bit immediate we should fold the immediate
    428       // instead. This reduces code size.
    429       // e.g.
    430       // movl 4(%esp), %eax
    431       // addl $4, %eax
    432       // vs.
    433       // movl $4, %eax
    434       // addl 4(%esp), %eax
    435       // The former is 2 bytes shorter. In case where the increment is 1, then
    436       // the saving can be 4 bytes (by using incl %eax).
    437       if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1))
    438         if (Imm->getAPIntValue().isSignedIntN(8))
    439           return false;
    440 
    441       // If the other operand is a TLS address, we should fold it instead.
    442       // This produces
    443       // movl    %gs:0, %eax
    444       // leal    i@NTPOFF(%eax), %eax
    445       // instead of
    446       // movl    $i@NTPOFF, %eax
    447       // addl    %gs:0, %eax
    448       // if the block also has an access to a second TLS address this will save
    449       // a load.
    450       // FIXME: This is probably also true for non-TLS addresses.
    451       if (Op1.getOpcode() == X86ISD::Wrapper) {
    452         SDValue Val = Op1.getOperand(0);
    453         if (Val.getOpcode() == ISD::TargetGlobalTLSAddress)
    454           return false;
    455       }
    456     }
    457     }
    458   }
    459 
    460   return true;
    461 }
    462 
    463 /// Replace the original chain operand of the call with
    464 /// load's chain operand and move load below the call's chain operand.
    465 static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
    466                                SDValue Call, SDValue OrigChain) {
    467   SmallVector<SDValue, 8> Ops;
    468   SDValue Chain = OrigChain.getOperand(0);
    469   if (Chain.getNode() == Load.getNode())
    470     Ops.push_back(Load.getOperand(0));
    471   else {
    472     assert(Chain.getOpcode() == ISD::TokenFactor &&
    473            "Unexpected chain operand");
    474     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
    475       if (Chain.getOperand(i).getNode() == Load.getNode())
    476         Ops.push_back(Load.getOperand(0));
    477       else
    478         Ops.push_back(Chain.getOperand(i));
    479     SDValue NewChain =
    480       CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops);
    481     Ops.clear();
    482     Ops.push_back(NewChain);
    483   }
    484   Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end());
    485   CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops);
    486   CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0),
    487                              Load.getOperand(1), Load.getOperand(2));
    488 
    489   Ops.clear();
    490   Ops.push_back(SDValue(Load.getNode(), 1));
    491   Ops.append(Call->op_begin() + 1, Call->op_end());
    492   CurDAG->UpdateNodeOperands(Call.getNode(), Ops);
    493 }
    494 
    495 /// Return true if call address is a load and it can be
    496 /// moved below CALLSEQ_START and the chains leading up to the call.
    497 /// Return the CALLSEQ_START by reference as a second output.
    498 /// In the case of a tail call, there isn't a callseq node between the call
    499 /// chain and the load.
    500 static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
    501   // The transformation is somewhat dangerous if the call's chain was glued to
    502   // the call. After MoveBelowOrigChain the load is moved between the call and
    503   // the chain, this can create a cycle if the load is not folded. So it is
    504   // *really* important that we are sure the load will be folded.
    505   if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
    506     return false;
    507   LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
    508   if (!LD ||
    509       LD->isVolatile() ||
    510       LD->getAddressingMode() != ISD::UNINDEXED ||
    511       LD->getExtensionType() != ISD::NON_EXTLOAD)
    512     return false;
    513 
    514   // Now let's find the callseq_start.
    515   while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
    516     if (!Chain.hasOneUse())
    517       return false;
    518     Chain = Chain.getOperand(0);
    519   }
    520 
    521   if (!Chain.getNumOperands())
    522     return false;
    523   // Since we are not checking for AA here, conservatively abort if the chain
    524   // writes to memory. It's not safe to move the callee (a load) across a store.
    525   if (isa<MemSDNode>(Chain.getNode()) &&
    526       cast<MemSDNode>(Chain.getNode())->writeMem())
    527     return false;
    528   if (Chain.getOperand(0).getNode() == Callee.getNode())
    529     return true;
    530   if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
    531       Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) &&
    532       Callee.getValue(1).hasOneUse())
    533     return true;
    534   return false;
    535 }
    536 
    537 void X86DAGToDAGISel::PreprocessISelDAG() {
    538   // OptFor[Min]Size are used in pattern predicates that isel is matching.
    539   OptForSize = MF->getFunction()->optForSize();
    540   OptForMinSize = MF->getFunction()->optForMinSize();
    541   assert((!OptForMinSize || OptForSize) && "OptForMinSize implies OptForSize");
    542 
    543   for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
    544        E = CurDAG->allnodes_end(); I != E; ) {
    545     SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues.
    546 
    547     if (OptLevel != CodeGenOpt::None &&
    548         // Only does this when target favors doesn't favor register indirect
    549         // call.
    550         ((N->getOpcode() == X86ISD::CALL && !Subtarget->callRegIndirect()) ||
    551          (N->getOpcode() == X86ISD::TC_RETURN &&
    552           // Only does this if load can be folded into TC_RETURN.
    553           (Subtarget->is64Bit() ||
    554            getTargetMachine().getRelocationModel() != Reloc::PIC_)))) {
    555       /// Also try moving call address load from outside callseq_start to just
    556       /// before the call to allow it to be folded.
    557       ///
    558       ///     [Load chain]
    559       ///         ^
    560       ///         |
    561       ///       [Load]
    562       ///       ^    ^
    563       ///       |    |
    564       ///      /      \--
    565       ///     /          |
    566       ///[CALLSEQ_START] |
    567       ///     ^          |
    568       ///     |          |
    569       /// [LOAD/C2Reg]   |
    570       ///     |          |
    571       ///      \        /
    572       ///       \      /
    573       ///       [CALL]
    574       bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
    575       SDValue Chain = N->getOperand(0);
    576       SDValue Load  = N->getOperand(1);
    577       if (!isCalleeLoad(Load, Chain, HasCallSeq))
    578         continue;
    579       moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
    580       ++NumLoadMoved;
    581       continue;
    582     }
    583 
    584     // Lower fpround and fpextend nodes that target the FP stack to be store and
    585     // load to the stack.  This is a gross hack.  We would like to simply mark
    586     // these as being illegal, but when we do that, legalize produces these when
    587     // it expands calls, then expands these in the same legalize pass.  We would
    588     // like dag combine to be able to hack on these between the call expansion
    589     // and the node legalization.  As such this pass basically does "really
    590     // late" legalization of these inline with the X86 isel pass.
    591     // FIXME: This should only happen when not compiled with -O0.
    592     if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
    593       continue;
    594 
    595     MVT SrcVT = N->getOperand(0).getSimpleValueType();
    596     MVT DstVT = N->getSimpleValueType(0);
    597 
    598     // If any of the sources are vectors, no fp stack involved.
    599     if (SrcVT.isVector() || DstVT.isVector())
    600       continue;
    601 
    602     // If the source and destination are SSE registers, then this is a legal
    603     // conversion that should not be lowered.
    604     const X86TargetLowering *X86Lowering =
    605         static_cast<const X86TargetLowering *>(TLI);
    606     bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT);
    607     bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT);
    608     if (SrcIsSSE && DstIsSSE)
    609       continue;
    610 
    611     if (!SrcIsSSE && !DstIsSSE) {
    612       // If this is an FPStack extension, it is a noop.
    613       if (N->getOpcode() == ISD::FP_EXTEND)
    614         continue;
    615       // If this is a value-preserving FPStack truncation, it is a noop.
    616       if (N->getConstantOperandVal(1))
    617         continue;
    618     }
    619 
    620     // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
    621     // FPStack has extload and truncstore.  SSE can fold direct loads into other
    622     // operations.  Based on this, decide what we want to do.
    623     MVT MemVT;
    624     if (N->getOpcode() == ISD::FP_ROUND)
    625       MemVT = DstVT;  // FP_ROUND must use DstVT, we can't do a 'trunc load'.
    626     else
    627       MemVT = SrcIsSSE ? SrcVT : DstVT;
    628 
    629     SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT);
    630     SDLoc dl(N);
    631 
    632     // FIXME: optimize the case where the src/dest is a load or store?
    633     SDValue Store = CurDAG->getTruncStore(CurDAG->getEntryNode(), dl,
    634                                           N->getOperand(0),
    635                                           MemTmp, MachinePointerInfo(), MemVT,
    636                                           false, false, 0);
    637     SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, MemTmp,
    638                                         MachinePointerInfo(),
    639                                         MemVT, false, false, false, 0);
    640 
    641     // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
    642     // extload we created.  This will cause general havok on the dag because
    643     // anything below the conversion could be folded into other existing nodes.
    644     // To avoid invalidating 'I', back it up to the convert node.
    645     --I;
    646     CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
    647 
    648     // Now that we did that, the node is dead.  Increment the iterator to the
    649     // next node to process, then delete N.
    650     ++I;
    651     CurDAG->DeleteNode(N);
    652   }
    653 }
    654 
    655 
    656 /// Emit any code that needs to be executed only in the main function.
    657 void X86DAGToDAGISel::emitSpecialCodeForMain() {
    658   if (Subtarget->isTargetCygMing()) {
    659     TargetLowering::ArgListTy Args;
    660     auto &DL = CurDAG->getDataLayout();
    661 
    662     TargetLowering::CallLoweringInfo CLI(*CurDAG);
    663     CLI.setChain(CurDAG->getRoot())
    664         .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()),
    665                    CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)),
    666                    std::move(Args), 0);
    667     const TargetLowering &TLI = CurDAG->getTargetLoweringInfo();
    668     std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
    669     CurDAG->setRoot(Result.second);
    670   }
    671 }
    672 
    673 void X86DAGToDAGISel::EmitFunctionEntryCode() {
    674   // If this is main, emit special code for main.
    675   if (const Function *Fn = MF->getFunction())
    676     if (Fn->hasExternalLinkage() && Fn->getName() == "main")
    677       emitSpecialCodeForMain();
    678 }
    679 
    680 static bool isDispSafeForFrameIndex(int64_t Val) {
    681   // On 64-bit platforms, we can run into an issue where a frame index
    682   // includes a displacement that, when added to the explicit displacement,
    683   // will overflow the displacement field. Assuming that the frame index
    684   // displacement fits into a 31-bit integer  (which is only slightly more
    685   // aggressive than the current fundamental assumption that it fits into
    686   // a 32-bit integer), a 31-bit disp should always be safe.
    687   return isInt<31>(Val);
    688 }
    689 
    690 bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset,
    691                                             X86ISelAddressMode &AM) {
    692   // Cannot combine ExternalSymbol displacements with integer offsets.
    693   if (Offset != 0 && (AM.ES || AM.MCSym))
    694     return true;
    695   int64_t Val = AM.Disp + Offset;
    696   CodeModel::Model M = TM.getCodeModel();
    697   if (Subtarget->is64Bit()) {
    698     if (!X86::isOffsetSuitableForCodeModel(Val, M,
    699                                            AM.hasSymbolicDisplacement()))
    700       return true;
    701     // In addition to the checks required for a register base, check that
    702     // we do not try to use an unsafe Disp with a frame index.
    703     if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
    704         !isDispSafeForFrameIndex(Val))
    705       return true;
    706   }
    707   AM.Disp = Val;
    708   return false;
    709 
    710 }
    711 
    712 bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
    713   SDValue Address = N->getOperand(1);
    714 
    715   // load gs:0 -> GS segment register.
    716   // load fs:0 -> FS segment register.
    717   //
    718   // This optimization is valid because the GNU TLS model defines that
    719   // gs:0 (or fs:0 on X86-64) contains its own address.
    720   // For more information see http://people.redhat.com/drepper/tls.pdf
    721   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address))
    722     if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr &&
    723         Subtarget->isTargetLinux())
    724       switch (N->getPointerInfo().getAddrSpace()) {
    725       case 256:
    726         AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
    727         return false;
    728       case 257:
    729         AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
    730         return false;
    731       }
    732 
    733   return true;
    734 }
    735 
    736 /// Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes into an addressing
    737 /// mode. These wrap things that will resolve down into a symbol reference.
    738 /// If no match is possible, this returns true, otherwise it returns false.
    739 bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) {
    740   // If the addressing mode already has a symbol as the displacement, we can
    741   // never match another symbol.
    742   if (AM.hasSymbolicDisplacement())
    743     return true;
    744 
    745   SDValue N0 = N.getOperand(0);
    746   CodeModel::Model M = TM.getCodeModel();
    747 
    748   // Handle X86-64 rip-relative addresses.  We check this before checking direct
    749   // folding because RIP is preferable to non-RIP accesses.
    750   if (Subtarget->is64Bit() && N.getOpcode() == X86ISD::WrapperRIP &&
    751       // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
    752       // they cannot be folded into immediate fields.
    753       // FIXME: This can be improved for kernel and other models?
    754       (M == CodeModel::Small || M == CodeModel::Kernel)) {
    755     // Base and index reg must be 0 in order to use %rip as base.
    756     if (AM.hasBaseOrIndexReg())
    757       return true;
    758     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
    759       X86ISelAddressMode Backup = AM;
    760       AM.GV = G->getGlobal();
    761       AM.SymbolFlags = G->getTargetFlags();
    762       if (foldOffsetIntoAddress(G->getOffset(), AM)) {
    763         AM = Backup;
    764         return true;
    765       }
    766     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
    767       X86ISelAddressMode Backup = AM;
    768       AM.CP = CP->getConstVal();
    769       AM.Align = CP->getAlignment();
    770       AM.SymbolFlags = CP->getTargetFlags();
    771       if (foldOffsetIntoAddress(CP->getOffset(), AM)) {
    772         AM = Backup;
    773         return true;
    774       }
    775     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
    776       AM.ES = S->getSymbol();
    777       AM.SymbolFlags = S->getTargetFlags();
    778     } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
    779       AM.MCSym = S->getMCSymbol();
    780     } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
    781       AM.JT = J->getIndex();
    782       AM.SymbolFlags = J->getTargetFlags();
    783     } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
    784       X86ISelAddressMode Backup = AM;
    785       AM.BlockAddr = BA->getBlockAddress();
    786       AM.SymbolFlags = BA->getTargetFlags();
    787       if (foldOffsetIntoAddress(BA->getOffset(), AM)) {
    788         AM = Backup;
    789         return true;
    790       }
    791     } else
    792       llvm_unreachable("Unhandled symbol reference node.");
    793 
    794     if (N.getOpcode() == X86ISD::WrapperRIP)
    795       AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
    796     return false;
    797   }
    798 
    799   // Handle the case when globals fit in our immediate field: This is true for
    800   // X86-32 always and X86-64 when in -mcmodel=small mode.  In 64-bit
    801   // mode, this only applies to a non-RIP-relative computation.
    802   if (!Subtarget->is64Bit() ||
    803       M == CodeModel::Small || M == CodeModel::Kernel) {
    804     assert(N.getOpcode() != X86ISD::WrapperRIP &&
    805            "RIP-relative addressing already handled");
    806     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
    807       AM.GV = G->getGlobal();
    808       AM.Disp += G->getOffset();
    809       AM.SymbolFlags = G->getTargetFlags();
    810     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
    811       AM.CP = CP->getConstVal();
    812       AM.Align = CP->getAlignment();
    813       AM.Disp += CP->getOffset();
    814       AM.SymbolFlags = CP->getTargetFlags();
    815     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
    816       AM.ES = S->getSymbol();
    817       AM.SymbolFlags = S->getTargetFlags();
    818     } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) {
    819       AM.MCSym = S->getMCSymbol();
    820     } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
    821       AM.JT = J->getIndex();
    822       AM.SymbolFlags = J->getTargetFlags();
    823     } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) {
    824       AM.BlockAddr = BA->getBlockAddress();
    825       AM.Disp += BA->getOffset();
    826       AM.SymbolFlags = BA->getTargetFlags();
    827     } else
    828       llvm_unreachable("Unhandled symbol reference node.");
    829     return false;
    830   }
    831 
    832   return true;
    833 }
    834 
    835 /// Add the specified node to the specified addressing mode, returning true if
    836 /// it cannot be done. This just pattern matches for the addressing mode.
    837 bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) {
    838   if (matchAddressRecursively(N, AM, 0))
    839     return true;
    840 
    841   // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
    842   // a smaller encoding and avoids a scaled-index.
    843   if (AM.Scale == 2 &&
    844       AM.BaseType == X86ISelAddressMode::RegBase &&
    845       AM.Base_Reg.getNode() == nullptr) {
    846     AM.Base_Reg = AM.IndexReg;
    847     AM.Scale = 1;
    848   }
    849 
    850   // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
    851   // because it has a smaller encoding.
    852   // TODO: Which other code models can use this?
    853   if (TM.getCodeModel() == CodeModel::Small &&
    854       Subtarget->is64Bit() &&
    855       AM.Scale == 1 &&
    856       AM.BaseType == X86ISelAddressMode::RegBase &&
    857       AM.Base_Reg.getNode() == nullptr &&
    858       AM.IndexReg.getNode() == nullptr &&
    859       AM.SymbolFlags == X86II::MO_NO_FLAG &&
    860       AM.hasSymbolicDisplacement())
    861     AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64);
    862 
    863   return false;
    864 }
    865 
    866 bool X86DAGToDAGISel::matchAdd(SDValue N, X86ISelAddressMode &AM,
    867                                unsigned Depth) {
    868   // Add an artificial use to this node so that we can keep track of
    869   // it if it gets CSE'd with a different node.
    870   HandleSDNode Handle(N);
    871 
    872   X86ISelAddressMode Backup = AM;
    873   if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
    874       !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1))
    875     return false;
    876   AM = Backup;
    877 
    878   // Try again after commuting the operands.
    879   if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1) &&
    880       !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth+1))
    881     return false;
    882   AM = Backup;
    883 
    884   // If we couldn't fold both operands into the address at the same time,
    885   // see if we can just put each operand into a register and fold at least
    886   // the add.
    887   if (AM.BaseType == X86ISelAddressMode::RegBase &&
    888       !AM.Base_Reg.getNode() &&
    889       !AM.IndexReg.getNode()) {
    890     N = Handle.getValue();
    891     AM.Base_Reg = N.getOperand(0);
    892     AM.IndexReg = N.getOperand(1);
    893     AM.Scale = 1;
    894     return false;
    895   }
    896   N = Handle.getValue();
    897   return true;
    898 }
    899 
    900 // Insert a node into the DAG at least before the Pos node's position. This
    901 // will reposition the node as needed, and will assign it a node ID that is <=
    902 // the Pos node's ID. Note that this does *not* preserve the uniqueness of node
    903 // IDs! The selection DAG must no longer depend on their uniqueness when this
    904 // is used.
    905 static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) {
    906   if (N.getNode()->getNodeId() == -1 ||
    907       N.getNode()->getNodeId() > Pos.getNode()->getNodeId()) {
    908     DAG.RepositionNode(Pos.getNode()->getIterator(), N.getNode());
    909     N.getNode()->setNodeId(Pos.getNode()->getNodeId());
    910   }
    911 }
    912 
    913 // Transform "(X >> (8-C1)) & (0xff << C1)" to "((X >> 8) & 0xff) << C1" if
    914 // safe. This allows us to convert the shift and and into an h-register
    915 // extract and a scaled index. Returns false if the simplification is
    916 // performed.
    917 static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N,
    918                                       uint64_t Mask,
    919                                       SDValue Shift, SDValue X,
    920                                       X86ISelAddressMode &AM) {
    921   if (Shift.getOpcode() != ISD::SRL ||
    922       !isa<ConstantSDNode>(Shift.getOperand(1)) ||
    923       !Shift.hasOneUse())
    924     return true;
    925 
    926   int ScaleLog = 8 - Shift.getConstantOperandVal(1);
    927   if (ScaleLog <= 0 || ScaleLog >= 4 ||
    928       Mask != (0xffu << ScaleLog))
    929     return true;
    930 
    931   MVT VT = N.getSimpleValueType();
    932   SDLoc DL(N);
    933   SDValue Eight = DAG.getConstant(8, DL, MVT::i8);
    934   SDValue NewMask = DAG.getConstant(0xff, DL, VT);
    935   SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight);
    936   SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask);
    937   SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8);
    938   SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount);
    939 
    940   // Insert the new nodes into the topological ordering. We must do this in
    941   // a valid topological ordering as nothing is going to go back and re-sort
    942   // these nodes. We continually insert before 'N' in sequence as this is
    943   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
    944   // hierarchy left to express.
    945   insertDAGNode(DAG, N, Eight);
    946   insertDAGNode(DAG, N, Srl);
    947   insertDAGNode(DAG, N, NewMask);
    948   insertDAGNode(DAG, N, And);
    949   insertDAGNode(DAG, N, ShlCount);
    950   insertDAGNode(DAG, N, Shl);
    951   DAG.ReplaceAllUsesWith(N, Shl);
    952   AM.IndexReg = And;
    953   AM.Scale = (1 << ScaleLog);
    954   return false;
    955 }
    956 
    957 // Transforms "(X << C1) & C2" to "(X & (C2>>C1)) << C1" if safe and if this
    958 // allows us to fold the shift into this addressing mode. Returns false if the
    959 // transform succeeded.
    960 static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N,
    961                                         uint64_t Mask,
    962                                         SDValue Shift, SDValue X,
    963                                         X86ISelAddressMode &AM) {
    964   if (Shift.getOpcode() != ISD::SHL ||
    965       !isa<ConstantSDNode>(Shift.getOperand(1)))
    966     return true;
    967 
    968   // Not likely to be profitable if either the AND or SHIFT node has more
    969   // than one use (unless all uses are for address computation). Besides,
    970   // isel mechanism requires their node ids to be reused.
    971   if (!N.hasOneUse() || !Shift.hasOneUse())
    972     return true;
    973 
    974   // Verify that the shift amount is something we can fold.
    975   unsigned ShiftAmt = Shift.getConstantOperandVal(1);
    976   if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3)
    977     return true;
    978 
    979   MVT VT = N.getSimpleValueType();
    980   SDLoc DL(N);
    981   SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT);
    982   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask);
    983   SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1));
    984 
    985   // Insert the new nodes into the topological ordering. We must do this in
    986   // a valid topological ordering as nothing is going to go back and re-sort
    987   // these nodes. We continually insert before 'N' in sequence as this is
    988   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
    989   // hierarchy left to express.
    990   insertDAGNode(DAG, N, NewMask);
    991   insertDAGNode(DAG, N, NewAnd);
    992   insertDAGNode(DAG, N, NewShift);
    993   DAG.ReplaceAllUsesWith(N, NewShift);
    994 
    995   AM.Scale = 1 << ShiftAmt;
    996   AM.IndexReg = NewAnd;
    997   return false;
    998 }
    999 
   1000 // Implement some heroics to detect shifts of masked values where the mask can
   1001 // be replaced by extending the shift and undoing that in the addressing mode
   1002 // scale. Patterns such as (shl (srl x, c1), c2) are canonicalized into (and
   1003 // (srl x, SHIFT), MASK) by DAGCombines that don't know the shl can be done in
   1004 // the addressing mode. This results in code such as:
   1005 //
   1006 //   int f(short *y, int *lookup_table) {
   1007 //     ...
   1008 //     return *y + lookup_table[*y >> 11];
   1009 //   }
   1010 //
   1011 // Turning into:
   1012 //   movzwl (%rdi), %eax
   1013 //   movl %eax, %ecx
   1014 //   shrl $11, %ecx
   1015 //   addl (%rsi,%rcx,4), %eax
   1016 //
   1017 // Instead of:
   1018 //   movzwl (%rdi), %eax
   1019 //   movl %eax, %ecx
   1020 //   shrl $9, %ecx
   1021 //   andl $124, %rcx
   1022 //   addl (%rsi,%rcx), %eax
   1023 //
   1024 // Note that this function assumes the mask is provided as a mask *after* the
   1025 // value is shifted. The input chain may or may not match that, but computing
   1026 // such a mask is trivial.
   1027 static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N,
   1028                                     uint64_t Mask,
   1029                                     SDValue Shift, SDValue X,
   1030                                     X86ISelAddressMode &AM) {
   1031   if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() ||
   1032       !isa<ConstantSDNode>(Shift.getOperand(1)))
   1033     return true;
   1034 
   1035   unsigned ShiftAmt = Shift.getConstantOperandVal(1);
   1036   unsigned MaskLZ = countLeadingZeros(Mask);
   1037   unsigned MaskTZ = countTrailingZeros(Mask);
   1038 
   1039   // The amount of shift we're trying to fit into the addressing mode is taken
   1040   // from the trailing zeros of the mask.
   1041   unsigned AMShiftAmt = MaskTZ;
   1042 
   1043   // There is nothing we can do here unless the mask is removing some bits.
   1044   // Also, the addressing mode can only represent shifts of 1, 2, or 3 bits.
   1045   if (AMShiftAmt <= 0 || AMShiftAmt > 3) return true;
   1046 
   1047   // We also need to ensure that mask is a continuous run of bits.
   1048   if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true;
   1049 
   1050   // Scale the leading zero count down based on the actual size of the value.
   1051   // Also scale it down based on the size of the shift.
   1052   MaskLZ -= (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt;
   1053 
   1054   // The final check is to ensure that any masked out high bits of X are
   1055   // already known to be zero. Otherwise, the mask has a semantic impact
   1056   // other than masking out a couple of low bits. Unfortunately, because of
   1057   // the mask, zero extensions will be removed from operands in some cases.
   1058   // This code works extra hard to look through extensions because we can
   1059   // replace them with zero extensions cheaply if necessary.
   1060   bool ReplacingAnyExtend = false;
   1061   if (X.getOpcode() == ISD::ANY_EXTEND) {
   1062     unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() -
   1063                           X.getOperand(0).getSimpleValueType().getSizeInBits();
   1064     // Assume that we'll replace the any-extend with a zero-extend, and
   1065     // narrow the search to the extended value.
   1066     X = X.getOperand(0);
   1067     MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits;
   1068     ReplacingAnyExtend = true;
   1069   }
   1070   APInt MaskedHighBits =
   1071     APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ);
   1072   APInt KnownZero, KnownOne;
   1073   DAG.computeKnownBits(X, KnownZero, KnownOne);
   1074   if (MaskedHighBits != KnownZero) return true;
   1075 
   1076   // We've identified a pattern that can be transformed into a single shift
   1077   // and an addressing mode. Make it so.
   1078   MVT VT = N.getSimpleValueType();
   1079   if (ReplacingAnyExtend) {
   1080     assert(X.getValueType() != VT);
   1081     // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND.
   1082     SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X);
   1083     insertDAGNode(DAG, N, NewX);
   1084     X = NewX;
   1085   }
   1086   SDLoc DL(N);
   1087   SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8);
   1088   SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt);
   1089   SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8);
   1090   SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt);
   1091 
   1092   // Insert the new nodes into the topological ordering. We must do this in
   1093   // a valid topological ordering as nothing is going to go back and re-sort
   1094   // these nodes. We continually insert before 'N' in sequence as this is
   1095   // essentially a pre-flattened and pre-sorted sequence of nodes. There is no
   1096   // hierarchy left to express.
   1097   insertDAGNode(DAG, N, NewSRLAmt);
   1098   insertDAGNode(DAG, N, NewSRL);
   1099   insertDAGNode(DAG, N, NewSHLAmt);
   1100   insertDAGNode(DAG, N, NewSHL);
   1101   DAG.ReplaceAllUsesWith(N, NewSHL);
   1102 
   1103   AM.Scale = 1 << AMShiftAmt;
   1104   AM.IndexReg = NewSRL;
   1105   return false;
   1106 }
   1107 
   1108 bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   1109                                               unsigned Depth) {
   1110   SDLoc dl(N);
   1111   DEBUG({
   1112       dbgs() << "MatchAddress: ";
   1113       AM.dump();
   1114     });
   1115   // Limit recursion.
   1116   if (Depth > 5)
   1117     return matchAddressBase(N, AM);
   1118 
   1119   // If this is already a %rip relative address, we can only merge immediates
   1120   // into it.  Instead of handling this in every case, we handle it here.
   1121   // RIP relative addressing: %rip + 32-bit displacement!
   1122   if (AM.isRIPRelative()) {
   1123     // FIXME: JumpTable and ExternalSymbol address currently don't like
   1124     // displacements.  It isn't very important, but this should be fixed for
   1125     // consistency.
   1126     if (!(AM.ES || AM.MCSym) && AM.JT != -1)
   1127       return true;
   1128 
   1129     if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
   1130       if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM))
   1131         return false;
   1132     return true;
   1133   }
   1134 
   1135   switch (N.getOpcode()) {
   1136   default: break;
   1137   case ISD::LOCAL_RECOVER: {
   1138     if (!AM.hasSymbolicDisplacement() && AM.Disp == 0)
   1139       if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) {
   1140         // Use the symbol and don't prefix it.
   1141         AM.MCSym = ESNode->getMCSymbol();
   1142         return false;
   1143       }
   1144     break;
   1145   }
   1146   case ISD::Constant: {
   1147     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
   1148     if (!foldOffsetIntoAddress(Val, AM))
   1149       return false;
   1150     break;
   1151   }
   1152 
   1153   case X86ISD::Wrapper:
   1154   case X86ISD::WrapperRIP:
   1155     if (!matchWrapper(N, AM))
   1156       return false;
   1157     break;
   1158 
   1159   case ISD::LOAD:
   1160     if (!matchLoadInAddress(cast<LoadSDNode>(N), AM))
   1161       return false;
   1162     break;
   1163 
   1164   case ISD::FrameIndex:
   1165     if (AM.BaseType == X86ISelAddressMode::RegBase &&
   1166         AM.Base_Reg.getNode() == nullptr &&
   1167         (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
   1168       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
   1169       AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
   1170       return false;
   1171     }
   1172     break;
   1173 
   1174   case ISD::SHL:
   1175     if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1)
   1176       break;
   1177 
   1178     if (ConstantSDNode
   1179           *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1))) {
   1180       unsigned Val = CN->getZExtValue();
   1181       // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
   1182       // that the base operand remains free for further matching. If
   1183       // the base doesn't end up getting used, a post-processing step
   1184       // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
   1185       if (Val == 1 || Val == 2 || Val == 3) {
   1186         AM.Scale = 1 << Val;
   1187         SDValue ShVal = N.getNode()->getOperand(0);
   1188 
   1189         // Okay, we know that we have a scale by now.  However, if the scaled
   1190         // value is an add of something and a constant, we can fold the
   1191         // constant into the disp field here.
   1192         if (CurDAG->isBaseWithConstantOffset(ShVal)) {
   1193           AM.IndexReg = ShVal.getNode()->getOperand(0);
   1194           ConstantSDNode *AddVal =
   1195             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
   1196           uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val;
   1197           if (!foldOffsetIntoAddress(Disp, AM))
   1198             return false;
   1199         }
   1200 
   1201         AM.IndexReg = ShVal;
   1202         return false;
   1203       }
   1204     }
   1205     break;
   1206 
   1207   case ISD::SRL: {
   1208     // Scale must not be used already.
   1209     if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
   1210 
   1211     SDValue And = N.getOperand(0);
   1212     if (And.getOpcode() != ISD::AND) break;
   1213     SDValue X = And.getOperand(0);
   1214 
   1215     // We only handle up to 64-bit values here as those are what matter for
   1216     // addressing mode optimizations.
   1217     if (X.getSimpleValueType().getSizeInBits() > 64) break;
   1218 
   1219     // The mask used for the transform is expected to be post-shift, but we
   1220     // found the shift first so just apply the shift to the mask before passing
   1221     // it down.
   1222     if (!isa<ConstantSDNode>(N.getOperand(1)) ||
   1223         !isa<ConstantSDNode>(And.getOperand(1)))
   1224       break;
   1225     uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1);
   1226 
   1227     // Try to fold the mask and shift into the scale, and return false if we
   1228     // succeed.
   1229     if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM))
   1230       return false;
   1231     break;
   1232   }
   1233 
   1234   case ISD::SMUL_LOHI:
   1235   case ISD::UMUL_LOHI:
   1236     // A mul_lohi where we need the low part can be folded as a plain multiply.
   1237     if (N.getResNo() != 0) break;
   1238     // FALL THROUGH
   1239   case ISD::MUL:
   1240   case X86ISD::MUL_IMM:
   1241     // X*[3,5,9] -> X+X*[2,4,8]
   1242     if (AM.BaseType == X86ISelAddressMode::RegBase &&
   1243         AM.Base_Reg.getNode() == nullptr &&
   1244         AM.IndexReg.getNode() == nullptr) {
   1245       if (ConstantSDNode
   1246             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
   1247         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
   1248             CN->getZExtValue() == 9) {
   1249           AM.Scale = unsigned(CN->getZExtValue())-1;
   1250 
   1251           SDValue MulVal = N.getNode()->getOperand(0);
   1252           SDValue Reg;
   1253 
   1254           // Okay, we know that we have a scale by now.  However, if the scaled
   1255           // value is an add of something and a constant, we can fold the
   1256           // constant into the disp field here.
   1257           if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() &&
   1258               isa<ConstantSDNode>(MulVal.getNode()->getOperand(1))) {
   1259             Reg = MulVal.getNode()->getOperand(0);
   1260             ConstantSDNode *AddVal =
   1261               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
   1262             uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
   1263             if (foldOffsetIntoAddress(Disp, AM))
   1264               Reg = N.getNode()->getOperand(0);
   1265           } else {
   1266             Reg = N.getNode()->getOperand(0);
   1267           }
   1268 
   1269           AM.IndexReg = AM.Base_Reg = Reg;
   1270           return false;
   1271         }
   1272     }
   1273     break;
   1274 
   1275   case ISD::SUB: {
   1276     // Given A-B, if A can be completely folded into the address and
   1277     // the index field with the index field unused, use -B as the index.
   1278     // This is a win if a has multiple parts that can be folded into
   1279     // the address. Also, this saves a mov if the base register has
   1280     // other uses, since it avoids a two-address sub instruction, however
   1281     // it costs an additional mov if the index register has other uses.
   1282 
   1283     // Add an artificial use to this node so that we can keep track of
   1284     // it if it gets CSE'd with a different node.
   1285     HandleSDNode Handle(N);
   1286 
   1287     // Test if the LHS of the sub can be folded.
   1288     X86ISelAddressMode Backup = AM;
   1289     if (matchAddressRecursively(N.getNode()->getOperand(0), AM, Depth+1)) {
   1290       AM = Backup;
   1291       break;
   1292     }
   1293     // Test if the index field is free for use.
   1294     if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
   1295       AM = Backup;
   1296       break;
   1297     }
   1298 
   1299     int Cost = 0;
   1300     SDValue RHS = Handle.getValue().getNode()->getOperand(1);
   1301     // If the RHS involves a register with multiple uses, this
   1302     // transformation incurs an extra mov, due to the neg instruction
   1303     // clobbering its operand.
   1304     if (!RHS.getNode()->hasOneUse() ||
   1305         RHS.getNode()->getOpcode() == ISD::CopyFromReg ||
   1306         RHS.getNode()->getOpcode() == ISD::TRUNCATE ||
   1307         RHS.getNode()->getOpcode() == ISD::ANY_EXTEND ||
   1308         (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND &&
   1309          RHS.getNode()->getOperand(0).getValueType() == MVT::i32))
   1310       ++Cost;
   1311     // If the base is a register with multiple uses, this
   1312     // transformation may save a mov.
   1313     if ((AM.BaseType == X86ISelAddressMode::RegBase &&
   1314          AM.Base_Reg.getNode() &&
   1315          !AM.Base_Reg.getNode()->hasOneUse()) ||
   1316         AM.BaseType == X86ISelAddressMode::FrameIndexBase)
   1317       --Cost;
   1318     // If the folded LHS was interesting, this transformation saves
   1319     // address arithmetic.
   1320     if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) +
   1321         ((AM.Disp != 0) && (Backup.Disp == 0)) +
   1322         (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2)
   1323       --Cost;
   1324     // If it doesn't look like it may be an overall win, don't do it.
   1325     if (Cost >= 0) {
   1326       AM = Backup;
   1327       break;
   1328     }
   1329 
   1330     // Ok, the transformation is legal and appears profitable. Go for it.
   1331     SDValue Zero = CurDAG->getConstant(0, dl, N.getValueType());
   1332     SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS);
   1333     AM.IndexReg = Neg;
   1334     AM.Scale = 1;
   1335 
   1336     // Insert the new nodes into the topological ordering.
   1337     insertDAGNode(*CurDAG, N, Zero);
   1338     insertDAGNode(*CurDAG, N, Neg);
   1339     return false;
   1340   }
   1341 
   1342   case ISD::ADD:
   1343     if (!matchAdd(N, AM, Depth))
   1344       return false;
   1345     break;
   1346 
   1347   case ISD::OR:
   1348     // We want to look through a transform in InstCombine and DAGCombiner that
   1349     // turns 'add' into 'or', so we can treat this 'or' exactly like an 'add'.
   1350     // Example: (or (and x, 1), (shl y, 3)) --> (add (and x, 1), (shl y, 3))
   1351     // An 'lea' can then be used to match the shift (multiply) and add:
   1352     // and $1, %esi
   1353     // lea (%rsi, %rdi, 8), %rax
   1354     if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) &&
   1355         !matchAdd(N, AM, Depth))
   1356       return false;
   1357     break;
   1358 
   1359   case ISD::AND: {
   1360     // Perform some heroic transforms on an and of a constant-count shift
   1361     // with a constant to enable use of the scaled offset field.
   1362 
   1363     // Scale must not be used already.
   1364     if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break;
   1365 
   1366     SDValue Shift = N.getOperand(0);
   1367     if (Shift.getOpcode() != ISD::SRL && Shift.getOpcode() != ISD::SHL) break;
   1368     SDValue X = Shift.getOperand(0);
   1369 
   1370     // We only handle up to 64-bit values here as those are what matter for
   1371     // addressing mode optimizations.
   1372     if (X.getSimpleValueType().getSizeInBits() > 64) break;
   1373 
   1374     if (!isa<ConstantSDNode>(N.getOperand(1)))
   1375       break;
   1376     uint64_t Mask = N.getConstantOperandVal(1);
   1377 
   1378     // Try to fold the mask and shift into an extract and scale.
   1379     if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM))
   1380       return false;
   1381 
   1382     // Try to fold the mask and shift directly into the scale.
   1383     if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM))
   1384       return false;
   1385 
   1386     // Try to swap the mask and shift to place shifts which can be done as
   1387     // a scale on the outside of the mask.
   1388     if (!foldMaskedShiftToScaledMask(*CurDAG, N, Mask, Shift, X, AM))
   1389       return false;
   1390     break;
   1391   }
   1392   }
   1393 
   1394   return matchAddressBase(N, AM);
   1395 }
   1396 
   1397 /// Helper for MatchAddress. Add the specified node to the
   1398 /// specified addressing mode without any further recursion.
   1399 bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   1400   // Is the base register already occupied?
   1401   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) {
   1402     // If so, check to see if the scale index register is set.
   1403     if (!AM.IndexReg.getNode()) {
   1404       AM.IndexReg = N;
   1405       AM.Scale = 1;
   1406       return false;
   1407     }
   1408 
   1409     // Otherwise, we cannot select it.
   1410     return true;
   1411   }
   1412 
   1413   // Default, generate it as a register.
   1414   AM.BaseType = X86ISelAddressMode::RegBase;
   1415   AM.Base_Reg = N;
   1416   return false;
   1417 }
   1418 
   1419 bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
   1420                                       SDValue &Scale, SDValue &Index,
   1421                                       SDValue &Disp, SDValue &Segment) {
   1422 
   1423   MaskedGatherScatterSDNode *Mgs = dyn_cast<MaskedGatherScatterSDNode>(Parent);
   1424   if (!Mgs)
   1425     return false;
   1426   X86ISelAddressMode AM;
   1427   unsigned AddrSpace = Mgs->getPointerInfo().getAddrSpace();
   1428   // AddrSpace 256 -> GS, 257 -> FS.
   1429   if (AddrSpace == 256)
   1430     AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
   1431   if (AddrSpace == 257)
   1432     AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
   1433 
   1434   SDLoc DL(N);
   1435   Base = Mgs->getBasePtr();
   1436   Index = Mgs->getIndex();
   1437   unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits();
   1438   Scale = getI8Imm(ScalarSize/8, DL);
   1439 
   1440   // If Base is 0, the whole address is in index and the Scale is 1
   1441   if (isa<ConstantSDNode>(Base)) {
   1442     assert(cast<ConstantSDNode>(Base)->isNullValue() &&
   1443            "Unexpected base in gather/scatter");
   1444     Scale = getI8Imm(1, DL);
   1445     Base = CurDAG->getRegister(0, MVT::i32);
   1446   }
   1447   if (AM.Segment.getNode())
   1448     Segment = AM.Segment;
   1449   else
   1450     Segment = CurDAG->getRegister(0, MVT::i32);
   1451   Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
   1452   return true;
   1453 }
   1454 
   1455 /// Returns true if it is able to pattern match an addressing mode.
   1456 /// It returns the operands which make up the maximal addressing mode it can
   1457 /// match by reference.
   1458 ///
   1459 /// Parent is the parent node of the addr operand that is being matched.  It
   1460 /// is always a load, store, atomic node, or null.  It is only null when
   1461 /// checking memory operands for inline asm nodes.
   1462 bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
   1463                                  SDValue &Scale, SDValue &Index,
   1464                                  SDValue &Disp, SDValue &Segment) {
   1465   X86ISelAddressMode AM;
   1466 
   1467   if (Parent &&
   1468       // This list of opcodes are all the nodes that have an "addr:$ptr" operand
   1469       // that are not a MemSDNode, and thus don't have proper addrspace info.
   1470       Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && // unaligned loads, fixme
   1471       Parent->getOpcode() != ISD::INTRINSIC_VOID && // nontemporal stores
   1472       Parent->getOpcode() != X86ISD::TLSCALL && // Fixme
   1473       Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && // setjmp
   1474       Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { // longjmp
   1475     unsigned AddrSpace =
   1476       cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace();
   1477     // AddrSpace 256 -> GS, 257 -> FS.
   1478     if (AddrSpace == 256)
   1479       AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16);
   1480     if (AddrSpace == 257)
   1481       AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16);
   1482   }
   1483 
   1484   if (matchAddress(N, AM))
   1485     return false;
   1486 
   1487   MVT VT = N.getSimpleValueType();
   1488   if (AM.BaseType == X86ISelAddressMode::RegBase) {
   1489     if (!AM.Base_Reg.getNode())
   1490       AM.Base_Reg = CurDAG->getRegister(0, VT);
   1491   }
   1492 
   1493   if (!AM.IndexReg.getNode())
   1494     AM.IndexReg = CurDAG->getRegister(0, VT);
   1495 
   1496   getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   1497   return true;
   1498 }
   1499 
   1500 /// Match a scalar SSE load. In particular, we want to match a load whose top
   1501 /// elements are either undef or zeros. The load flavor is derived from the
   1502 /// type of N, which is either v4f32 or v2f64.
   1503 ///
   1504 /// We also return:
   1505 ///   PatternChainNode: this is the matched node that has a chain input and
   1506 ///   output.
   1507 bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
   1508                                           SDValue N, SDValue &Base,
   1509                                           SDValue &Scale, SDValue &Index,
   1510                                           SDValue &Disp, SDValue &Segment,
   1511                                           SDValue &PatternNodeWithChain) {
   1512   if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   1513     PatternNodeWithChain = N.getOperand(0);
   1514     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
   1515         PatternNodeWithChain.hasOneUse() &&
   1516         IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
   1517         IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
   1518       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
   1519       if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
   1520         return false;
   1521       return true;
   1522     }
   1523   }
   1524 
   1525   // Also handle the case where we explicitly require zeros in the top
   1526   // elements.  This is a vector shuffle from the zero vector.
   1527   if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
   1528       // Check to see if the top elements are all zeros (or bitcast of zeros).
   1529       N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
   1530       N.getOperand(0).getNode()->hasOneUse() &&
   1531       ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
   1532       N.getOperand(0).getOperand(0).hasOneUse() &&
   1533       IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
   1534       IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
   1535     // Okay, this is a zero extending load.  Fold it.
   1536     LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
   1537     if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
   1538       return false;
   1539     PatternNodeWithChain = SDValue(LD, 0);
   1540     return true;
   1541   }
   1542   return false;
   1543 }
   1544 
   1545 
   1546 bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
   1547   if (const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
   1548     uint64_t ImmVal = CN->getZExtValue();
   1549     if ((uint32_t)ImmVal != (uint64_t)ImmVal)
   1550       return false;
   1551 
   1552     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i64);
   1553     return true;
   1554   }
   1555 
   1556   // In static codegen with small code model, we can get the address of a label
   1557   // into a register with 'movl'. TableGen has already made sure we're looking
   1558   // at a label of some kind.
   1559   assert(N->getOpcode() == X86ISD::Wrapper &&
   1560          "Unexpected node type for MOV32ri64");
   1561   N = N.getOperand(0);
   1562 
   1563   if (N->getOpcode() != ISD::TargetConstantPool &&
   1564       N->getOpcode() != ISD::TargetJumpTable &&
   1565       N->getOpcode() != ISD::TargetGlobalAddress &&
   1566       N->getOpcode() != ISD::TargetExternalSymbol &&
   1567       N->getOpcode() != ISD::MCSymbol &&
   1568       N->getOpcode() != ISD::TargetBlockAddress)
   1569     return false;
   1570 
   1571   Imm = N;
   1572   return TM.getCodeModel() == CodeModel::Small;
   1573 }
   1574 
   1575 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
   1576                                          SDValue &Scale, SDValue &Index,
   1577                                          SDValue &Disp, SDValue &Segment) {
   1578   if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
   1579     return false;
   1580 
   1581   SDLoc DL(N);
   1582   RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base);
   1583   if (RN && RN->getReg() == 0)
   1584     Base = CurDAG->getRegister(0, MVT::i64);
   1585   else if (Base.getValueType() == MVT::i32 && !dyn_cast<FrameIndexSDNode>(Base)) {
   1586     // Base could already be %rip, particularly in the x32 ABI.
   1587     Base = SDValue(CurDAG->getMachineNode(
   1588                        TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
   1589                        CurDAG->getTargetConstant(0, DL, MVT::i64),
   1590                        Base,
   1591                        CurDAG->getTargetConstant(X86::sub_32bit, DL, MVT::i32)),
   1592                    0);
   1593   }
   1594 
   1595   RN = dyn_cast<RegisterSDNode>(Index);
   1596   if (RN && RN->getReg() == 0)
   1597     Index = CurDAG->getRegister(0, MVT::i64);
   1598   else {
   1599     assert(Index.getValueType() == MVT::i32 &&
   1600            "Expect to be extending 32-bit registers for use in LEA");
   1601     Index = SDValue(CurDAG->getMachineNode(
   1602                         TargetOpcode::SUBREG_TO_REG, DL, MVT::i64,
   1603                         CurDAG->getTargetConstant(0, DL, MVT::i64),
   1604                         Index,
   1605                         CurDAG->getTargetConstant(X86::sub_32bit, DL,
   1606                                                   MVT::i32)),
   1607                     0);
   1608   }
   1609 
   1610   return true;
   1611 }
   1612 
   1613 /// Calls SelectAddr and determines if the maximal addressing
   1614 /// mode it matches can be cost effectively emitted as an LEA instruction.
   1615 bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
   1616                                     SDValue &Base, SDValue &Scale,
   1617                                     SDValue &Index, SDValue &Disp,
   1618                                     SDValue &Segment) {
   1619   X86ISelAddressMode AM;
   1620 
   1621   // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
   1622   // segments.
   1623   SDValue Copy = AM.Segment;
   1624   SDValue T = CurDAG->getRegister(0, MVT::i32);
   1625   AM.Segment = T;
   1626   if (matchAddress(N, AM))
   1627     return false;
   1628   assert (T == AM.Segment);
   1629   AM.Segment = Copy;
   1630 
   1631   MVT VT = N.getSimpleValueType();
   1632   unsigned Complexity = 0;
   1633   if (AM.BaseType == X86ISelAddressMode::RegBase)
   1634     if (AM.Base_Reg.getNode())
   1635       Complexity = 1;
   1636     else
   1637       AM.Base_Reg = CurDAG->getRegister(0, VT);
   1638   else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase)
   1639     Complexity = 4;
   1640 
   1641   if (AM.IndexReg.getNode())
   1642     Complexity++;
   1643   else
   1644     AM.IndexReg = CurDAG->getRegister(0, VT);
   1645 
   1646   // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
   1647   // a simple shift.
   1648   if (AM.Scale > 1)
   1649     Complexity++;
   1650 
   1651   // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
   1652   // to a LEA. This is determined with some experimentation but is by no means
   1653   // optimal (especially for code size consideration). LEA is nice because of
   1654   // its three-address nature. Tweak the cost function again when we can run
   1655   // convertToThreeAddress() at register allocation time.
   1656   if (AM.hasSymbolicDisplacement()) {
   1657     // For X86-64, always use LEA to materialize RIP-relative addresses.
   1658     if (Subtarget->is64Bit())
   1659       Complexity = 4;
   1660     else
   1661       Complexity += 2;
   1662   }
   1663 
   1664   if (AM.Disp && (AM.Base_Reg.getNode() || AM.IndexReg.getNode()))
   1665     Complexity++;
   1666 
   1667   // If it isn't worth using an LEA, reject it.
   1668   if (Complexity <= 2)
   1669     return false;
   1670 
   1671   getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   1672   return true;
   1673 }
   1674 
   1675 /// This is only run on TargetGlobalTLSAddress nodes.
   1676 bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
   1677                                         SDValue &Scale, SDValue &Index,
   1678                                         SDValue &Disp, SDValue &Segment) {
   1679   assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
   1680   const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
   1681 
   1682   X86ISelAddressMode AM;
   1683   AM.GV = GA->getGlobal();
   1684   AM.Disp += GA->getOffset();
   1685   AM.Base_Reg = CurDAG->getRegister(0, N.getValueType());
   1686   AM.SymbolFlags = GA->getTargetFlags();
   1687 
   1688   if (N.getValueType() == MVT::i32) {
   1689     AM.Scale = 1;
   1690     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
   1691   } else {
   1692     AM.IndexReg = CurDAG->getRegister(0, MVT::i64);
   1693   }
   1694 
   1695   getAddressOperands(AM, SDLoc(N), Base, Scale, Index, Disp, Segment);
   1696   return true;
   1697 }
   1698 
   1699 
   1700 bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
   1701                                   SDValue &Base, SDValue &Scale,
   1702                                   SDValue &Index, SDValue &Disp,
   1703                                   SDValue &Segment) {
   1704   if (!ISD::isNON_EXTLoad(N.getNode()) ||
   1705       !IsProfitableToFold(N, P, P) ||
   1706       !IsLegalToFold(N, P, P, OptLevel))
   1707     return false;
   1708 
   1709   return selectAddr(N.getNode(),
   1710                     N.getOperand(1), Base, Scale, Index, Disp, Segment);
   1711 }
   1712 
   1713 /// Return an SDNode that returns the value of the global base register.
   1714 /// Output instructions required to initialize the global base register,
   1715 /// if necessary.
   1716 SDNode *X86DAGToDAGISel::getGlobalBaseReg() {
   1717   unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
   1718   auto &DL = MF->getDataLayout();
   1719   return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode();
   1720 }
   1721 
   1722 /// Atomic opcode table
   1723 ///
   1724 enum AtomicOpc {
   1725   ADD,
   1726   SUB,
   1727   INC,
   1728   DEC,
   1729   OR,
   1730   AND,
   1731   XOR,
   1732   AtomicOpcEnd
   1733 };
   1734 
   1735 enum AtomicSz {
   1736   ConstantI8,
   1737   I8,
   1738   SextConstantI16,
   1739   ConstantI16,
   1740   I16,
   1741   SextConstantI32,
   1742   ConstantI32,
   1743   I32,
   1744   SextConstantI64,
   1745   ConstantI64,
   1746   I64,
   1747   AtomicSzEnd
   1748 };
   1749 
   1750 static const uint16_t AtomicOpcTbl[AtomicOpcEnd][AtomicSzEnd] = {
   1751   {
   1752     X86::LOCK_ADD8mi,
   1753     X86::LOCK_ADD8mr,
   1754     X86::LOCK_ADD16mi8,
   1755     X86::LOCK_ADD16mi,
   1756     X86::LOCK_ADD16mr,
   1757     X86::LOCK_ADD32mi8,
   1758     X86::LOCK_ADD32mi,
   1759     X86::LOCK_ADD32mr,
   1760     X86::LOCK_ADD64mi8,
   1761     X86::LOCK_ADD64mi32,
   1762     X86::LOCK_ADD64mr,
   1763   },
   1764   {
   1765     X86::LOCK_SUB8mi,
   1766     X86::LOCK_SUB8mr,
   1767     X86::LOCK_SUB16mi8,
   1768     X86::LOCK_SUB16mi,
   1769     X86::LOCK_SUB16mr,
   1770     X86::LOCK_SUB32mi8,
   1771     X86::LOCK_SUB32mi,
   1772     X86::LOCK_SUB32mr,
   1773     X86::LOCK_SUB64mi8,
   1774     X86::LOCK_SUB64mi32,
   1775     X86::LOCK_SUB64mr,
   1776   },
   1777   {
   1778     0,
   1779     X86::LOCK_INC8m,
   1780     0,
   1781     0,
   1782     X86::LOCK_INC16m,
   1783     0,
   1784     0,
   1785     X86::LOCK_INC32m,
   1786     0,
   1787     0,
   1788     X86::LOCK_INC64m,
   1789   },
   1790   {
   1791     0,
   1792     X86::LOCK_DEC8m,
   1793     0,
   1794     0,
   1795     X86::LOCK_DEC16m,
   1796     0,
   1797     0,
   1798     X86::LOCK_DEC32m,
   1799     0,
   1800     0,
   1801     X86::LOCK_DEC64m,
   1802   },
   1803   {
   1804     X86::LOCK_OR8mi,
   1805     X86::LOCK_OR8mr,
   1806     X86::LOCK_OR16mi8,
   1807     X86::LOCK_OR16mi,
   1808     X86::LOCK_OR16mr,
   1809     X86::LOCK_OR32mi8,
   1810     X86::LOCK_OR32mi,
   1811     X86::LOCK_OR32mr,
   1812     X86::LOCK_OR64mi8,
   1813     X86::LOCK_OR64mi32,
   1814     X86::LOCK_OR64mr,
   1815   },
   1816   {
   1817     X86::LOCK_AND8mi,
   1818     X86::LOCK_AND8mr,
   1819     X86::LOCK_AND16mi8,
   1820     X86::LOCK_AND16mi,
   1821     X86::LOCK_AND16mr,
   1822     X86::LOCK_AND32mi8,
   1823     X86::LOCK_AND32mi,
   1824     X86::LOCK_AND32mr,
   1825     X86::LOCK_AND64mi8,
   1826     X86::LOCK_AND64mi32,
   1827     X86::LOCK_AND64mr,
   1828   },
   1829   {
   1830     X86::LOCK_XOR8mi,
   1831     X86::LOCK_XOR8mr,
   1832     X86::LOCK_XOR16mi8,
   1833     X86::LOCK_XOR16mi,
   1834     X86::LOCK_XOR16mr,
   1835     X86::LOCK_XOR32mi8,
   1836     X86::LOCK_XOR32mi,
   1837     X86::LOCK_XOR32mr,
   1838     X86::LOCK_XOR64mi8,
   1839     X86::LOCK_XOR64mi32,
   1840     X86::LOCK_XOR64mr,
   1841   }
   1842 };
   1843 
   1844 // Return the target constant operand for atomic-load-op and do simple
   1845 // translations, such as from atomic-load-add to lock-sub. The return value is
   1846 // one of the following 3 cases:
   1847 // + target-constant, the operand could be supported as a target constant.
   1848 // + empty, the operand is not needed any more with the new op selected.
   1849 // + non-empty, otherwise.
   1850 static SDValue getAtomicLoadArithTargetConstant(SelectionDAG *CurDAG,
   1851                                                 SDLoc dl,
   1852                                                 enum AtomicOpc &Op, MVT NVT,
   1853                                                 SDValue Val,
   1854                                                 const X86Subtarget *Subtarget) {
   1855   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val)) {
   1856     int64_t CNVal = CN->getSExtValue();
   1857     // Quit if not 32-bit imm.
   1858     if ((int32_t)CNVal != CNVal)
   1859       return Val;
   1860     // Quit if INT32_MIN: it would be negated as it is negative and overflow,
   1861     // producing an immediate that does not fit in the 32 bits available for
   1862     // an immediate operand to sub. However, it still fits in 32 bits for the
   1863     // add (since it is not negated) so we can return target-constant.
   1864     if (CNVal == INT32_MIN)
   1865       return CurDAG->getTargetConstant(CNVal, dl, NVT);
   1866     // For atomic-load-add, we could do some optimizations.
   1867     if (Op == ADD) {
   1868       // Translate to INC/DEC if ADD by 1 or -1.
   1869       if (((CNVal == 1) || (CNVal == -1)) && !Subtarget->slowIncDec()) {
   1870         Op = (CNVal == 1) ? INC : DEC;
   1871         // No more constant operand after being translated into INC/DEC.
   1872         return SDValue();
   1873       }
   1874       // Translate to SUB if ADD by negative value.
   1875       if (CNVal < 0) {
   1876         Op = SUB;
   1877         CNVal = -CNVal;
   1878       }
   1879     }
   1880     return CurDAG->getTargetConstant(CNVal, dl, NVT);
   1881   }
   1882 
   1883   // If the value operand is single-used, try to optimize it.
   1884   if (Op == ADD && Val.hasOneUse()) {
   1885     // Translate (atomic-load-add ptr (sub 0 x)) back to (lock-sub x).
   1886     if (Val.getOpcode() == ISD::SUB && X86::isZeroNode(Val.getOperand(0))) {
   1887       Op = SUB;
   1888       return Val.getOperand(1);
   1889     }
   1890     // A special case for i16, which needs truncating as, in most cases, it's
   1891     // promoted to i32. We will translate
   1892     // (atomic-load-add (truncate (sub 0 x))) to (lock-sub (EXTRACT_SUBREG x))
   1893     if (Val.getOpcode() == ISD::TRUNCATE && NVT == MVT::i16 &&
   1894         Val.getOperand(0).getOpcode() == ISD::SUB &&
   1895         X86::isZeroNode(Val.getOperand(0).getOperand(0))) {
   1896       Op = SUB;
   1897       Val = Val.getOperand(0);
   1898       return CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl, NVT,
   1899                                             Val.getOperand(1));
   1900     }
   1901   }
   1902 
   1903   return Val;
   1904 }
   1905 
   1906 SDNode *X86DAGToDAGISel::selectAtomicLoadArith(SDNode *Node, MVT NVT) {
   1907   if (Node->hasAnyUseOfValue(0))
   1908     return nullptr;
   1909 
   1910   SDLoc dl(Node);
   1911 
   1912   // Optimize common patterns for __sync_or_and_fetch and similar arith
   1913   // operations where the result is not used. This allows us to use the "lock"
   1914   // version of the arithmetic instruction.
   1915   SDValue Chain = Node->getOperand(0);
   1916   SDValue Ptr = Node->getOperand(1);
   1917   SDValue Val = Node->getOperand(2);
   1918   SDValue Base, Scale, Index, Disp, Segment;
   1919   if (!selectAddr(Node, Ptr, Base, Scale, Index, Disp, Segment))
   1920     return nullptr;
   1921 
   1922   // Which index into the table.
   1923   enum AtomicOpc Op;
   1924   switch (Node->getOpcode()) {
   1925     default:
   1926       return nullptr;
   1927     case ISD::ATOMIC_LOAD_OR:
   1928       Op = OR;
   1929       break;
   1930     case ISD::ATOMIC_LOAD_AND:
   1931       Op = AND;
   1932       break;
   1933     case ISD::ATOMIC_LOAD_XOR:
   1934       Op = XOR;
   1935       break;
   1936     case ISD::ATOMIC_LOAD_ADD:
   1937       Op = ADD;
   1938       break;
   1939   }
   1940 
   1941   Val = getAtomicLoadArithTargetConstant(CurDAG, dl, Op, NVT, Val, Subtarget);
   1942   bool isUnOp = !Val.getNode();
   1943   bool isCN = Val.getNode() && (Val.getOpcode() == ISD::TargetConstant);
   1944 
   1945   unsigned Opc = 0;
   1946   switch (NVT.SimpleTy) {
   1947     default: return nullptr;
   1948     case MVT::i8:
   1949       if (isCN)
   1950         Opc = AtomicOpcTbl[Op][ConstantI8];
   1951       else
   1952         Opc = AtomicOpcTbl[Op][I8];
   1953       break;
   1954     case MVT::i16:
   1955       if (isCN) {
   1956         if (immSext8(Val.getNode()))
   1957           Opc = AtomicOpcTbl[Op][SextConstantI16];
   1958         else
   1959           Opc = AtomicOpcTbl[Op][ConstantI16];
   1960       } else
   1961         Opc = AtomicOpcTbl[Op][I16];
   1962       break;
   1963     case MVT::i32:
   1964       if (isCN) {
   1965         if (immSext8(Val.getNode()))
   1966           Opc = AtomicOpcTbl[Op][SextConstantI32];
   1967         else
   1968           Opc = AtomicOpcTbl[Op][ConstantI32];
   1969       } else
   1970         Opc = AtomicOpcTbl[Op][I32];
   1971       break;
   1972     case MVT::i64:
   1973       if (isCN) {
   1974         if (immSext8(Val.getNode()))
   1975           Opc = AtomicOpcTbl[Op][SextConstantI64];
   1976         else if (i64immSExt32(Val.getNode()))
   1977           Opc = AtomicOpcTbl[Op][ConstantI64];
   1978         else
   1979           llvm_unreachable("True 64 bits constant in SelectAtomicLoadArith");
   1980       } else
   1981         Opc = AtomicOpcTbl[Op][I64];
   1982       break;
   1983   }
   1984 
   1985   assert(Opc != 0 && "Invalid arith lock transform!");
   1986 
   1987   // Building the new node.
   1988   SDValue Ret;
   1989   if (isUnOp) {
   1990     SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Chain };
   1991     Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   1992   } else {
   1993     SDValue Ops[] = { Base, Scale, Index, Disp, Segment, Val, Chain };
   1994     Ret = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops), 0);
   1995   }
   1996 
   1997   // Copying the MachineMemOperand.
   1998   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
   1999   MemOp[0] = cast<MemSDNode>(Node)->getMemOperand();
   2000   cast<MachineSDNode>(Ret)->setMemRefs(MemOp, MemOp + 1);
   2001 
   2002   // We need to have two outputs as that is what the original instruction had.
   2003   // So we add a dummy, undefined output. This is safe as we checked first
   2004   // that no-one uses our output anyway.
   2005   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
   2006                                                  dl, NVT), 0);
   2007   SDValue RetVals[] = { Undef, Ret };
   2008   return CurDAG->getMergeValues(RetVals, dl).getNode();
   2009 }
   2010 
   2011 /// Test whether the given X86ISD::CMP node has any uses which require the SF
   2012 /// or OF bits to be accurate.
   2013 static bool hasNoSignedComparisonUses(SDNode *N) {
   2014   // Examine each user of the node.
   2015   for (SDNode::use_iterator UI = N->use_begin(),
   2016          UE = N->use_end(); UI != UE; ++UI) {
   2017     // Only examine CopyToReg uses.
   2018     if (UI->getOpcode() != ISD::CopyToReg)
   2019       return false;
   2020     // Only examine CopyToReg uses that copy to EFLAGS.
   2021     if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() !=
   2022           X86::EFLAGS)
   2023       return false;
   2024     // Examine each user of the CopyToReg use.
   2025     for (SDNode::use_iterator FlagUI = UI->use_begin(),
   2026            FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) {
   2027       // Only examine the Flag result.
   2028       if (FlagUI.getUse().getResNo() != 1) continue;
   2029       // Anything unusual: assume conservatively.
   2030       if (!FlagUI->isMachineOpcode()) return false;
   2031       // Examine the opcode of the user.
   2032       switch (FlagUI->getMachineOpcode()) {
   2033       // These comparisons don't treat the most significant bit specially.
   2034       case X86::SETAr: case X86::SETAEr: case X86::SETBr: case X86::SETBEr:
   2035       case X86::SETEr: case X86::SETNEr: case X86::SETPr: case X86::SETNPr:
   2036       case X86::SETAm: case X86::SETAEm: case X86::SETBm: case X86::SETBEm:
   2037       case X86::SETEm: case X86::SETNEm: case X86::SETPm: case X86::SETNPm:
   2038       case X86::JA_1: case X86::JAE_1: case X86::JB_1: case X86::JBE_1:
   2039       case X86::JE_1: case X86::JNE_1: case X86::JP_1: case X86::JNP_1:
   2040       case X86::CMOVA16rr: case X86::CMOVA16rm:
   2041       case X86::CMOVA32rr: case X86::CMOVA32rm:
   2042       case X86::CMOVA64rr: case X86::CMOVA64rm:
   2043       case X86::CMOVAE16rr: case X86::CMOVAE16rm:
   2044       case X86::CMOVAE32rr: case X86::CMOVAE32rm:
   2045       case X86::CMOVAE64rr: case X86::CMOVAE64rm:
   2046       case X86::CMOVB16rr: case X86::CMOVB16rm:
   2047       case X86::CMOVB32rr: case X86::CMOVB32rm:
   2048       case X86::CMOVB64rr: case X86::CMOVB64rm:
   2049       case X86::CMOVBE16rr: case X86::CMOVBE16rm:
   2050       case X86::CMOVBE32rr: case X86::CMOVBE32rm:
   2051       case X86::CMOVBE64rr: case X86::CMOVBE64rm:
   2052       case X86::CMOVE16rr: case X86::CMOVE16rm:
   2053       case X86::CMOVE32rr: case X86::CMOVE32rm:
   2054       case X86::CMOVE64rr: case X86::CMOVE64rm:
   2055       case X86::CMOVNE16rr: case X86::CMOVNE16rm:
   2056       case X86::CMOVNE32rr: case X86::CMOVNE32rm:
   2057       case X86::CMOVNE64rr: case X86::CMOVNE64rm:
   2058       case X86::CMOVNP16rr: case X86::CMOVNP16rm:
   2059       case X86::CMOVNP32rr: case X86::CMOVNP32rm:
   2060       case X86::CMOVNP64rr: case X86::CMOVNP64rm:
   2061       case X86::CMOVP16rr: case X86::CMOVP16rm:
   2062       case X86::CMOVP32rr: case X86::CMOVP32rm:
   2063       case X86::CMOVP64rr: case X86::CMOVP64rm:
   2064         continue;
   2065       // Anything else: assume conservatively.
   2066       default: return false;
   2067       }
   2068     }
   2069   }
   2070   return true;
   2071 }
   2072 
   2073 /// Check whether or not the chain ending in StoreNode is suitable for doing
   2074 /// the {load; increment or decrement; store} to modify transformation.
   2075 static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,
   2076                                 SDValue StoredVal, SelectionDAG *CurDAG,
   2077                                 LoadSDNode* &LoadNode, SDValue &InputChain) {
   2078 
   2079   // is the value stored the result of a DEC or INC?
   2080   if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false;
   2081 
   2082   // is the stored value result 0 of the load?
   2083   if (StoredVal.getResNo() != 0) return false;
   2084 
   2085   // are there other uses of the loaded value than the inc or dec?
   2086   if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false;
   2087 
   2088   // is the store non-extending and non-indexed?
   2089   if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal())
   2090     return false;
   2091 
   2092   SDValue Load = StoredVal->getOperand(0);
   2093   // Is the stored value a non-extending and non-indexed load?
   2094   if (!ISD::isNormalLoad(Load.getNode())) return false;
   2095 
   2096   // Return LoadNode by reference.
   2097   LoadNode = cast<LoadSDNode>(Load);
   2098   // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8)
   2099   EVT LdVT = LoadNode->getMemoryVT();
   2100   if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&
   2101       LdVT != MVT::i8)
   2102     return false;
   2103 
   2104   // Is store the only read of the loaded value?
   2105   if (!Load.hasOneUse())
   2106     return false;
   2107 
   2108   // Is the address of the store the same as the load?
   2109   if (LoadNode->getBasePtr() != StoreNode->getBasePtr() ||
   2110       LoadNode->getOffset() != StoreNode->getOffset())
   2111     return false;
   2112 
   2113   // Check if the chain is produced by the load or is a TokenFactor with
   2114   // the load output chain as an operand. Return InputChain by reference.
   2115   SDValue Chain = StoreNode->getChain();
   2116 
   2117   bool ChainCheck = false;
   2118   if (Chain == Load.getValue(1)) {
   2119     ChainCheck = true;
   2120     InputChain = LoadNode->getChain();
   2121   } else if (Chain.getOpcode() == ISD::TokenFactor) {
   2122     SmallVector<SDValue, 4> ChainOps;
   2123     for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) {
   2124       SDValue Op = Chain.getOperand(i);
   2125       if (Op == Load.getValue(1)) {
   2126         ChainCheck = true;
   2127         continue;
   2128       }
   2129 
   2130       // Make sure using Op as part of the chain would not cause a cycle here.
   2131       // In theory, we could check whether the chain node is a predecessor of
   2132       // the load. But that can be very expensive. Instead visit the uses and
   2133       // make sure they all have smaller node id than the load.
   2134       int LoadId = LoadNode->getNodeId();
   2135       for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
   2136              UE = UI->use_end(); UI != UE; ++UI) {
   2137         if (UI.getUse().getResNo() != 0)
   2138           continue;
   2139         if (UI->getNodeId() > LoadId)
   2140           return false;
   2141       }
   2142 
   2143       ChainOps.push_back(Op);
   2144     }
   2145 
   2146     if (ChainCheck)
   2147       // Make a new TokenFactor with all the other input chains except
   2148       // for the load.
   2149       InputChain = CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain),
   2150                                    MVT::Other, ChainOps);
   2151   }
   2152   if (!ChainCheck)
   2153     return false;
   2154 
   2155   return true;
   2156 }
   2157 
   2158 /// Get the appropriate X86 opcode for an in-memory increment or decrement.
   2159 /// Opc should be X86ISD::DEC or X86ISD::INC.
   2160 static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) {
   2161   if (Opc == X86ISD::DEC) {
   2162     if (LdVT == MVT::i64) return X86::DEC64m;
   2163     if (LdVT == MVT::i32) return X86::DEC32m;
   2164     if (LdVT == MVT::i16) return X86::DEC16m;
   2165     if (LdVT == MVT::i8)  return X86::DEC8m;
   2166   } else {
   2167     assert(Opc == X86ISD::INC && "unrecognized opcode");
   2168     if (LdVT == MVT::i64) return X86::INC64m;
   2169     if (LdVT == MVT::i32) return X86::INC32m;
   2170     if (LdVT == MVT::i16) return X86::INC16m;
   2171     if (LdVT == MVT::i8)  return X86::INC8m;
   2172   }
   2173   llvm_unreachable("unrecognized size for LdVT");
   2174 }
   2175 
   2176 /// Customized ISel for GATHER operations.
   2177 SDNode *X86DAGToDAGISel::selectGather(SDNode *Node, unsigned Opc) {
   2178   // Operands of Gather: VSrc, Base, VIdx, VMask, Scale
   2179   SDValue Chain = Node->getOperand(0);
   2180   SDValue VSrc = Node->getOperand(2);
   2181   SDValue Base = Node->getOperand(3);
   2182   SDValue VIdx = Node->getOperand(4);
   2183   SDValue VMask = Node->getOperand(5);
   2184   ConstantSDNode *Scale = dyn_cast<ConstantSDNode>(Node->getOperand(6));
   2185   if (!Scale)
   2186     return nullptr;
   2187 
   2188   SDVTList VTs = CurDAG->getVTList(VSrc.getValueType(), VSrc.getValueType(),
   2189                                    MVT::Other);
   2190 
   2191   SDLoc DL(Node);
   2192 
   2193   // Memory Operands: Base, Scale, Index, Disp, Segment
   2194   SDValue Disp = CurDAG->getTargetConstant(0, DL, MVT::i32);
   2195   SDValue Segment = CurDAG->getRegister(0, MVT::i32);
   2196   const SDValue Ops[] = { VSrc, Base, getI8Imm(Scale->getSExtValue(), DL), VIdx,
   2197                           Disp, Segment, VMask, Chain};
   2198   SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops);
   2199   // Node has 2 outputs: VDst and MVT::Other.
   2200   // ResNode has 3 outputs: VDst, VMask_wb, and MVT::Other.
   2201   // We replace VDst of Node with VDst of ResNode, and Other of Node with Other
   2202   // of ResNode.
   2203   ReplaceUses(SDValue(Node, 0), SDValue(ResNode, 0));
   2204   ReplaceUses(SDValue(Node, 1), SDValue(ResNode, 2));
   2205   return ResNode;
   2206 }
   2207 
   2208 SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
   2209   MVT NVT = Node->getSimpleValueType(0);
   2210   unsigned Opc, MOpc;
   2211   unsigned Opcode = Node->getOpcode();
   2212   SDLoc dl(Node);
   2213 
   2214   DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
   2215 
   2216   if (Node->isMachineOpcode()) {
   2217     DEBUG(dbgs() << "== ";  Node->dump(CurDAG); dbgs() << '\n');
   2218     Node->setNodeId(-1);
   2219     return nullptr;   // Already selected.
   2220   }
   2221 
   2222   switch (Opcode) {
   2223   default: break;
   2224   case ISD::BRIND: {
   2225     if (Subtarget->isTargetNaCl())
   2226       // NaCl has its own pass where jmp %r32 are converted to jmp %r64. We
   2227       // leave the instruction alone.
   2228       break;
   2229     if (Subtarget->isTarget64BitILP32()) {
   2230       // Converts a 32-bit register to a 64-bit, zero-extended version of
   2231       // it. This is needed because x86-64 can do many things, but jmp %r32
   2232       // ain't one of them.
   2233       const SDValue &Target = Node->getOperand(1);
   2234       assert(Target.getSimpleValueType() == llvm::MVT::i32);
   2235       SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, EVT(MVT::i64));
   2236       SDValue Brind = CurDAG->getNode(ISD::BRIND, dl, MVT::Other,
   2237                                       Node->getOperand(0), ZextTarget);
   2238       ReplaceUses(SDValue(Node, 0), Brind);
   2239       SelectCode(ZextTarget.getNode());
   2240       SelectCode(Brind.getNode());
   2241       return nullptr;
   2242     }
   2243     break;
   2244   }
   2245   case ISD::INTRINSIC_W_CHAIN: {
   2246     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   2247     switch (IntNo) {
   2248     default: break;
   2249     case Intrinsic::x86_avx2_gather_d_pd:
   2250     case Intrinsic::x86_avx2_gather_d_pd_256:
   2251     case Intrinsic::x86_avx2_gather_q_pd:
   2252     case Intrinsic::x86_avx2_gather_q_pd_256:
   2253     case Intrinsic::x86_avx2_gather_d_ps:
   2254     case Intrinsic::x86_avx2_gather_d_ps_256:
   2255     case Intrinsic::x86_avx2_gather_q_ps:
   2256     case Intrinsic::x86_avx2_gather_q_ps_256:
   2257     case Intrinsic::x86_avx2_gather_d_q:
   2258     case Intrinsic::x86_avx2_gather_d_q_256:
   2259     case Intrinsic::x86_avx2_gather_q_q:
   2260     case Intrinsic::x86_avx2_gather_q_q_256:
   2261     case Intrinsic::x86_avx2_gather_d_d:
   2262     case Intrinsic::x86_avx2_gather_d_d_256:
   2263     case Intrinsic::x86_avx2_gather_q_d:
   2264     case Intrinsic::x86_avx2_gather_q_d_256: {
   2265       if (!Subtarget->hasAVX2())
   2266         break;
   2267       unsigned Opc;
   2268       switch (IntNo) {
   2269       default: llvm_unreachable("Impossible intrinsic");
   2270       case Intrinsic::x86_avx2_gather_d_pd:     Opc = X86::VGATHERDPDrm;  break;
   2271       case Intrinsic::x86_avx2_gather_d_pd_256: Opc = X86::VGATHERDPDYrm; break;
   2272       case Intrinsic::x86_avx2_gather_q_pd:     Opc = X86::VGATHERQPDrm;  break;
   2273       case Intrinsic::x86_avx2_gather_q_pd_256: Opc = X86::VGATHERQPDYrm; break;
   2274       case Intrinsic::x86_avx2_gather_d_ps:     Opc = X86::VGATHERDPSrm;  break;
   2275       case Intrinsic::x86_avx2_gather_d_ps_256: Opc = X86::VGATHERDPSYrm; break;
   2276       case Intrinsic::x86_avx2_gather_q_ps:     Opc = X86::VGATHERQPSrm;  break;
   2277       case Intrinsic::x86_avx2_gather_q_ps_256: Opc = X86::VGATHERQPSYrm; break;
   2278       case Intrinsic::x86_avx2_gather_d_q:      Opc = X86::VPGATHERDQrm;  break;
   2279       case Intrinsic::x86_avx2_gather_d_q_256:  Opc = X86::VPGATHERDQYrm; break;
   2280       case Intrinsic::x86_avx2_gather_q_q:      Opc = X86::VPGATHERQQrm;  break;
   2281       case Intrinsic::x86_avx2_gather_q_q_256:  Opc = X86::VPGATHERQQYrm; break;
   2282       case Intrinsic::x86_avx2_gather_d_d:      Opc = X86::VPGATHERDDrm;  break;
   2283       case Intrinsic::x86_avx2_gather_d_d_256:  Opc = X86::VPGATHERDDYrm; break;
   2284       case Intrinsic::x86_avx2_gather_q_d:      Opc = X86::VPGATHERQDrm;  break;
   2285       case Intrinsic::x86_avx2_gather_q_d_256:  Opc = X86::VPGATHERQDYrm; break;
   2286       }
   2287       SDNode *RetVal = selectGather(Node, Opc);
   2288       if (RetVal)
   2289         // We already called ReplaceUses inside SelectGather.
   2290         return nullptr;
   2291       break;
   2292     }
   2293     }
   2294     break;
   2295   }
   2296   case X86ISD::GlobalBaseReg:
   2297     return getGlobalBaseReg();
   2298 
   2299   case X86ISD::SHRUNKBLEND: {
   2300     // SHRUNKBLEND selects like a regular VSELECT.
   2301     SDValue VSelect = CurDAG->getNode(
   2302         ISD::VSELECT, SDLoc(Node), Node->getValueType(0), Node->getOperand(0),
   2303         Node->getOperand(1), Node->getOperand(2));
   2304     ReplaceUses(SDValue(Node, 0), VSelect);
   2305     SelectCode(VSelect.getNode());
   2306     // We already called ReplaceUses.
   2307     return nullptr;
   2308   }
   2309 
   2310   case ISD::ATOMIC_LOAD_XOR:
   2311   case ISD::ATOMIC_LOAD_AND:
   2312   case ISD::ATOMIC_LOAD_OR:
   2313   case ISD::ATOMIC_LOAD_ADD: {
   2314     SDNode *RetVal = selectAtomicLoadArith(Node, NVT);
   2315     if (RetVal)
   2316       return RetVal;
   2317     break;
   2318   }
   2319   case ISD::AND:
   2320   case ISD::OR:
   2321   case ISD::XOR: {
   2322     // For operations of the form (x << C1) op C2, check if we can use a smaller
   2323     // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
   2324     SDValue N0 = Node->getOperand(0);
   2325     SDValue N1 = Node->getOperand(1);
   2326 
   2327     if (N0->getOpcode() != ISD::SHL || !N0->hasOneUse())
   2328       break;
   2329 
   2330     // i8 is unshrinkable, i16 should be promoted to i32.
   2331     if (NVT != MVT::i32 && NVT != MVT::i64)
   2332       break;
   2333 
   2334     ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1);
   2335     ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   2336     if (!Cst || !ShlCst)
   2337       break;
   2338 
   2339     int64_t Val = Cst->getSExtValue();
   2340     uint64_t ShlVal = ShlCst->getZExtValue();
   2341 
   2342     // Make sure that we don't change the operation by removing bits.
   2343     // This only matters for OR and XOR, AND is unaffected.
   2344     uint64_t RemovedBitsMask = (1ULL << ShlVal) - 1;
   2345     if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0)
   2346       break;
   2347 
   2348     unsigned ShlOp, AddOp, Op;
   2349     MVT CstVT = NVT;
   2350 
   2351     // Check the minimum bitwidth for the new constant.
   2352     // TODO: AND32ri is the same as AND64ri32 with zext imm.
   2353     // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
   2354     // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
   2355     if (!isInt<8>(Val) && isInt<8>(Val >> ShlVal))
   2356       CstVT = MVT::i8;
   2357     else if (!isInt<32>(Val) && isInt<32>(Val >> ShlVal))
   2358       CstVT = MVT::i32;
   2359 
   2360     // Bail if there is no smaller encoding.
   2361     if (NVT == CstVT)
   2362       break;
   2363 
   2364     switch (NVT.SimpleTy) {
   2365     default: llvm_unreachable("Unsupported VT!");
   2366     case MVT::i32:
   2367       assert(CstVT == MVT::i8);
   2368       ShlOp = X86::SHL32ri;
   2369       AddOp = X86::ADD32rr;
   2370 
   2371       switch (Opcode) {
   2372       default: llvm_unreachable("Impossible opcode");
   2373       case ISD::AND: Op = X86::AND32ri8; break;
   2374       case ISD::OR:  Op =  X86::OR32ri8; break;
   2375       case ISD::XOR: Op = X86::XOR32ri8; break;
   2376       }
   2377       break;
   2378     case MVT::i64:
   2379       assert(CstVT == MVT::i8 || CstVT == MVT::i32);
   2380       ShlOp = X86::SHL64ri;
   2381       AddOp = X86::ADD64rr;
   2382 
   2383       switch (Opcode) {
   2384       default: llvm_unreachable("Impossible opcode");
   2385       case ISD::AND: Op = CstVT==MVT::i8? X86::AND64ri8 : X86::AND64ri32; break;
   2386       case ISD::OR:  Op = CstVT==MVT::i8?  X86::OR64ri8 :  X86::OR64ri32; break;
   2387       case ISD::XOR: Op = CstVT==MVT::i8? X86::XOR64ri8 : X86::XOR64ri32; break;
   2388       }
   2389       break;
   2390     }
   2391 
   2392     // Emit the smaller op and the shift.
   2393     SDValue NewCst = CurDAG->getTargetConstant(Val >> ShlVal, dl, CstVT);
   2394     SDNode *New = CurDAG->getMachineNode(Op, dl, NVT, N0->getOperand(0),NewCst);
   2395     if (ShlVal == 1)
   2396       return CurDAG->SelectNodeTo(Node, AddOp, NVT, SDValue(New, 0),
   2397                                   SDValue(New, 0));
   2398     return CurDAG->SelectNodeTo(Node, ShlOp, NVT, SDValue(New, 0),
   2399                                 getI8Imm(ShlVal, dl));
   2400   }
   2401   case X86ISD::UMUL8:
   2402   case X86ISD::SMUL8: {
   2403     SDValue N0 = Node->getOperand(0);
   2404     SDValue N1 = Node->getOperand(1);
   2405 
   2406     Opc = (Opcode == X86ISD::SMUL8 ? X86::IMUL8r : X86::MUL8r);
   2407 
   2408     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::AL,
   2409                                           N0, SDValue()).getValue(1);
   2410 
   2411     SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32);
   2412     SDValue Ops[] = {N1, InFlag};
   2413     SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
   2414 
   2415     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
   2416     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
   2417     return nullptr;
   2418   }
   2419 
   2420   case X86ISD::UMUL: {
   2421     SDValue N0 = Node->getOperand(0);
   2422     SDValue N1 = Node->getOperand(1);
   2423 
   2424     unsigned LoReg;
   2425     switch (NVT.SimpleTy) {
   2426     default: llvm_unreachable("Unsupported VT!");
   2427     case MVT::i8:  LoReg = X86::AL;  Opc = X86::MUL8r; break;
   2428     case MVT::i16: LoReg = X86::AX;  Opc = X86::MUL16r; break;
   2429     case MVT::i32: LoReg = X86::EAX; Opc = X86::MUL32r; break;
   2430     case MVT::i64: LoReg = X86::RAX; Opc = X86::MUL64r; break;
   2431     }
   2432 
   2433     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg,
   2434                                           N0, SDValue()).getValue(1);
   2435 
   2436     SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::i32);
   2437     SDValue Ops[] = {N1, InFlag};
   2438     SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
   2439 
   2440     ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0));
   2441     ReplaceUses(SDValue(Node, 1), SDValue(CNode, 1));
   2442     ReplaceUses(SDValue(Node, 2), SDValue(CNode, 2));
   2443     return nullptr;
   2444   }
   2445 
   2446   case ISD::SMUL_LOHI:
   2447   case ISD::UMUL_LOHI: {
   2448     SDValue N0 = Node->getOperand(0);
   2449     SDValue N1 = Node->getOperand(1);
   2450 
   2451     bool isSigned = Opcode == ISD::SMUL_LOHI;
   2452     bool hasBMI2 = Subtarget->hasBMI2();
   2453     if (!isSigned) {
   2454       switch (NVT.SimpleTy) {
   2455       default: llvm_unreachable("Unsupported VT!");
   2456       case MVT::i8:  Opc = X86::MUL8r;  MOpc = X86::MUL8m;  break;
   2457       case MVT::i16: Opc = X86::MUL16r; MOpc = X86::MUL16m; break;
   2458       case MVT::i32: Opc = hasBMI2 ? X86::MULX32rr : X86::MUL32r;
   2459                      MOpc = hasBMI2 ? X86::MULX32rm : X86::MUL32m; break;
   2460       case MVT::i64: Opc = hasBMI2 ? X86::MULX64rr : X86::MUL64r;
   2461                      MOpc = hasBMI2 ? X86::MULX64rm : X86::MUL64m; break;
   2462       }
   2463     } else {
   2464       switch (NVT.SimpleTy) {
   2465       default: llvm_unreachable("Unsupported VT!");
   2466       case MVT::i8:  Opc = X86::IMUL8r;  MOpc = X86::IMUL8m;  break;
   2467       case MVT::i16: Opc = X86::IMUL16r; MOpc = X86::IMUL16m; break;
   2468       case MVT::i32: Opc = X86::IMUL32r; MOpc = X86::IMUL32m; break;
   2469       case MVT::i64: Opc = X86::IMUL64r; MOpc = X86::IMUL64m; break;
   2470       }
   2471     }
   2472 
   2473     unsigned SrcReg, LoReg, HiReg;
   2474     switch (Opc) {
   2475     default: llvm_unreachable("Unknown MUL opcode!");
   2476     case X86::IMUL8r:
   2477     case X86::MUL8r:
   2478       SrcReg = LoReg = X86::AL; HiReg = X86::AH;
   2479       break;
   2480     case X86::IMUL16r:
   2481     case X86::MUL16r:
   2482       SrcReg = LoReg = X86::AX; HiReg = X86::DX;
   2483       break;
   2484     case X86::IMUL32r:
   2485     case X86::MUL32r:
   2486       SrcReg = LoReg = X86::EAX; HiReg = X86::EDX;
   2487       break;
   2488     case X86::IMUL64r:
   2489     case X86::MUL64r:
   2490       SrcReg = LoReg = X86::RAX; HiReg = X86::RDX;
   2491       break;
   2492     case X86::MULX32rr:
   2493       SrcReg = X86::EDX; LoReg = HiReg = 0;
   2494       break;
   2495     case X86::MULX64rr:
   2496       SrcReg = X86::RDX; LoReg = HiReg = 0;
   2497       break;
   2498     }
   2499 
   2500     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
   2501     bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
   2502     // Multiply is commmutative.
   2503     if (!foldedLoad) {
   2504       foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
   2505       if (foldedLoad)
   2506         std::swap(N0, N1);
   2507     }
   2508 
   2509     SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, SrcReg,
   2510                                           N0, SDValue()).getValue(1);
   2511     SDValue ResHi, ResLo;
   2512 
   2513     if (foldedLoad) {
   2514       SDValue Chain;
   2515       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
   2516                         InFlag };
   2517       if (MOpc == X86::MULX32rm || MOpc == X86::MULX64rm) {
   2518         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other, MVT::Glue);
   2519         SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
   2520         ResHi = SDValue(CNode, 0);
   2521         ResLo = SDValue(CNode, 1);
   2522         Chain = SDValue(CNode, 2);
   2523         InFlag = SDValue(CNode, 3);
   2524       } else {
   2525         SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
   2526         SDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops);
   2527         Chain = SDValue(CNode, 0);
   2528         InFlag = SDValue(CNode, 1);
   2529       }
   2530 
   2531       // Update the chain.
   2532       ReplaceUses(N1.getValue(1), Chain);
   2533     } else {
   2534       SDValue Ops[] = { N1, InFlag };
   2535       if (Opc == X86::MULX32rr || Opc == X86::MULX64rr) {
   2536         SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Glue);
   2537         SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
   2538         ResHi = SDValue(CNode, 0);
   2539         ResLo = SDValue(CNode, 1);
   2540         InFlag = SDValue(CNode, 2);
   2541       } else {
   2542         SDVTList VTs = CurDAG->getVTList(MVT::Glue);
   2543         SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops);
   2544         InFlag = SDValue(CNode, 0);
   2545       }
   2546     }
   2547 
   2548     // Prevent use of AH in a REX instruction by referencing AX instead.
   2549     if (HiReg == X86::AH && Subtarget->is64Bit() &&
   2550         !SDValue(Node, 1).use_empty()) {
   2551       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
   2552                                               X86::AX, MVT::i16, InFlag);
   2553       InFlag = Result.getValue(2);
   2554       // Get the low part if needed. Don't use getCopyFromReg for aliasing
   2555       // registers.
   2556       if (!SDValue(Node, 0).use_empty())
   2557         ReplaceUses(SDValue(Node, 1),
   2558           CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
   2559 
   2560       // Shift AX down 8 bits.
   2561       Result = SDValue(CurDAG->getMachineNode(X86::SHR16ri, dl, MVT::i16,
   2562                                               Result,
   2563                                      CurDAG->getTargetConstant(8, dl, MVT::i8)),
   2564                        0);
   2565       // Then truncate it down to i8.
   2566       ReplaceUses(SDValue(Node, 1),
   2567         CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result));
   2568     }
   2569     // Copy the low half of the result, if it is needed.
   2570     if (!SDValue(Node, 0).use_empty()) {
   2571       if (!ResLo.getNode()) {
   2572         assert(LoReg && "Register for low half is not defined!");
   2573         ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, NVT,
   2574                                        InFlag);
   2575         InFlag = ResLo.getValue(2);
   2576       }
   2577       ReplaceUses(SDValue(Node, 0), ResLo);
   2578       DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); dbgs() << '\n');
   2579     }
   2580     // Copy the high half of the result, if it is needed.
   2581     if (!SDValue(Node, 1).use_empty()) {
   2582       if (!ResHi.getNode()) {
   2583         assert(HiReg && "Register for high half is not defined!");
   2584         ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, NVT,
   2585                                        InFlag);
   2586         InFlag = ResHi.getValue(2);
   2587       }
   2588       ReplaceUses(SDValue(Node, 1), ResHi);
   2589       DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); dbgs() << '\n');
   2590     }
   2591 
   2592     return nullptr;
   2593   }
   2594 
   2595   case ISD::SDIVREM:
   2596   case ISD::UDIVREM:
   2597   case X86ISD::SDIVREM8_SEXT_HREG:
   2598   case X86ISD::UDIVREM8_ZEXT_HREG: {
   2599     SDValue N0 = Node->getOperand(0);
   2600     SDValue N1 = Node->getOperand(1);
   2601 
   2602     bool isSigned = (Opcode == ISD::SDIVREM ||
   2603                      Opcode == X86ISD::SDIVREM8_SEXT_HREG);
   2604     if (!isSigned) {
   2605       switch (NVT.SimpleTy) {
   2606       default: llvm_unreachable("Unsupported VT!");
   2607       case MVT::i8:  Opc = X86::DIV8r;  MOpc = X86::DIV8m;  break;
   2608       case MVT::i16: Opc = X86::DIV16r; MOpc = X86::DIV16m; break;
   2609       case MVT::i32: Opc = X86::DIV32r; MOpc = X86::DIV32m; break;
   2610       case MVT::i64: Opc = X86::DIV64r; MOpc = X86::DIV64m; break;
   2611       }
   2612     } else {
   2613       switch (NVT.SimpleTy) {
   2614       default: llvm_unreachable("Unsupported VT!");
   2615       case MVT::i8:  Opc = X86::IDIV8r;  MOpc = X86::IDIV8m;  break;
   2616       case MVT::i16: Opc = X86::IDIV16r; MOpc = X86::IDIV16m; break;
   2617       case MVT::i32: Opc = X86::IDIV32r; MOpc = X86::IDIV32m; break;
   2618       case MVT::i64: Opc = X86::IDIV64r; MOpc = X86::IDIV64m; break;
   2619       }
   2620     }
   2621 
   2622     unsigned LoReg, HiReg, ClrReg;
   2623     unsigned SExtOpcode;
   2624     switch (NVT.SimpleTy) {
   2625     default: llvm_unreachable("Unsupported VT!");
   2626     case MVT::i8:
   2627       LoReg = X86::AL;  ClrReg = HiReg = X86::AH;
   2628       SExtOpcode = X86::CBW;
   2629       break;
   2630     case MVT::i16:
   2631       LoReg = X86::AX;  HiReg = X86::DX;
   2632       ClrReg = X86::DX;
   2633       SExtOpcode = X86::CWD;
   2634       break;
   2635     case MVT::i32:
   2636       LoReg = X86::EAX; ClrReg = HiReg = X86::EDX;
   2637       SExtOpcode = X86::CDQ;
   2638       break;
   2639     case MVT::i64:
   2640       LoReg = X86::RAX; ClrReg = HiReg = X86::RDX;
   2641       SExtOpcode = X86::CQO;
   2642       break;
   2643     }
   2644 
   2645     SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4;
   2646     bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4);
   2647     bool signBitIsZero = CurDAG->SignBitIsZero(N0);
   2648 
   2649     SDValue InFlag;
   2650     if (NVT == MVT::i8 && (!isSigned || signBitIsZero)) {
   2651       // Special case for div8, just use a move with zero extension to AX to
   2652       // clear the upper 8 bits (AH).
   2653       SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Move, Chain;
   2654       if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) {
   2655         SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) };
   2656         Move =
   2657           SDValue(CurDAG->getMachineNode(X86::MOVZX32rm8, dl, MVT::i32,
   2658                                          MVT::Other, Ops), 0);
   2659         Chain = Move.getValue(1);
   2660         ReplaceUses(N0.getValue(1), Chain);
   2661       } else {
   2662         Move =
   2663           SDValue(CurDAG->getMachineNode(X86::MOVZX32rr8, dl, MVT::i32, N0),0);
   2664         Chain = CurDAG->getEntryNode();
   2665       }
   2666       Chain  = CurDAG->getCopyToReg(Chain, dl, X86::EAX, Move, SDValue());
   2667       InFlag = Chain.getValue(1);
   2668     } else {
   2669       InFlag =
   2670         CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl,
   2671                              LoReg, N0, SDValue()).getValue(1);
   2672       if (isSigned && !signBitIsZero) {
   2673         // Sign extend the low part into the high part.
   2674         InFlag =
   2675           SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0);
   2676       } else {
   2677         // Zero out the high part, effectively zero extending the input.
   2678         SDValue ClrNode = SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, NVT), 0);
   2679         switch (NVT.SimpleTy) {
   2680         case MVT::i16:
   2681           ClrNode =
   2682               SDValue(CurDAG->getMachineNode(
   2683                           TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode,
   2684                           CurDAG->getTargetConstant(X86::sub_16bit, dl,
   2685                                                     MVT::i32)),
   2686                       0);
   2687           break;
   2688         case MVT::i32:
   2689           break;
   2690         case MVT::i64:
   2691           ClrNode =
   2692               SDValue(CurDAG->getMachineNode(
   2693                           TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
   2694                           CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode,
   2695                           CurDAG->getTargetConstant(X86::sub_32bit, dl,
   2696                                                     MVT::i32)),
   2697                       0);
   2698           break;
   2699         default:
   2700           llvm_unreachable("Unexpected division source");
   2701         }
   2702 
   2703         InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg,
   2704                                       ClrNode, InFlag).getValue(1);
   2705       }
   2706     }
   2707 
   2708     if (foldedLoad) {
   2709       SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0),
   2710                         InFlag };
   2711       SDNode *CNode =
   2712         CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops);
   2713       InFlag = SDValue(CNode, 1);
   2714       // Update the chain.
   2715       ReplaceUses(N1.getValue(1), SDValue(CNode, 0));
   2716     } else {
   2717       InFlag =
   2718         SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N1, InFlag), 0);
   2719     }
   2720 
   2721     // Prevent use of AH in a REX instruction by explicitly copying it to
   2722     // an ABCD_L register.
   2723     //
   2724     // The current assumption of the register allocator is that isel
   2725     // won't generate explicit references to the GR8_ABCD_H registers. If
   2726     // the allocator and/or the backend get enhanced to be more robust in
   2727     // that regard, this can be, and should be, removed.
   2728     if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) {
   2729       SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8);
   2730       unsigned AHExtOpcode =
   2731           isSigned ? X86::MOVSX32_NOREXrr8 : X86::MOVZX32_NOREXrr8;
   2732 
   2733       SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32,
   2734                                              MVT::Glue, AHCopy, InFlag);
   2735       SDValue Result(RNode, 0);
   2736       InFlag = SDValue(RNode, 1);
   2737 
   2738       if (Opcode == X86ISD::UDIVREM8_ZEXT_HREG ||
   2739           Opcode == X86ISD::SDIVREM8_SEXT_HREG) {
   2740         if (Node->getValueType(1) == MVT::i64) {
   2741           // It's not possible to directly movsx AH to a 64bit register, because
   2742           // the latter needs the REX prefix, but the former can't have it.
   2743           assert(Opcode != X86ISD::SDIVREM8_SEXT_HREG &&
   2744                  "Unexpected i64 sext of h-register");
   2745           Result =
   2746               SDValue(CurDAG->getMachineNode(
   2747                           TargetOpcode::SUBREG_TO_REG, dl, MVT::i64,
   2748                           CurDAG->getTargetConstant(0, dl, MVT::i64), Result,
   2749                           CurDAG->getTargetConstant(X86::sub_32bit, dl,
   2750                                                     MVT::i32)),
   2751                       0);
   2752         }
   2753       } else {
   2754         Result =
   2755             CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result);
   2756       }
   2757       ReplaceUses(SDValue(Node, 1), Result);
   2758       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
   2759     }
   2760     // Copy the division (low) result, if it is needed.
   2761     if (!SDValue(Node, 0).use_empty()) {
   2762       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
   2763                                                 LoReg, NVT, InFlag);
   2764       InFlag = Result.getValue(2);
   2765       ReplaceUses(SDValue(Node, 0), Result);
   2766       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
   2767     }
   2768     // Copy the remainder (high) result, if it is needed.
   2769     if (!SDValue(Node, 1).use_empty()) {
   2770       SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl,
   2771                                               HiReg, NVT, InFlag);
   2772       InFlag = Result.getValue(2);
   2773       ReplaceUses(SDValue(Node, 1), Result);
   2774       DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
   2775     }
   2776     return nullptr;
   2777   }
   2778 
   2779   case X86ISD::CMP:
   2780   case X86ISD::SUB: {
   2781     // Sometimes a SUB is used to perform comparison.
   2782     if (Opcode == X86ISD::SUB && Node->hasAnyUseOfValue(0))
   2783       // This node is not a CMP.
   2784       break;
   2785     SDValue N0 = Node->getOperand(0);
   2786     SDValue N1 = Node->getOperand(1);
   2787 
   2788     if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
   2789         hasNoSignedComparisonUses(Node))
   2790       N0 = N0.getOperand(0);
   2791 
   2792     // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
   2793     // use a smaller encoding.
   2794     // Look past the truncate if CMP is the only use of it.
   2795     if ((N0.getNode()->getOpcode() == ISD::AND ||
   2796          (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) &&
   2797         N0.getNode()->hasOneUse() &&
   2798         N0.getValueType() != MVT::i8 &&
   2799         X86::isZeroNode(N1)) {
   2800       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getNode()->getOperand(1));
   2801       if (!C) break;
   2802 
   2803       // For example, convert "testl %eax, $8" to "testb %al, $8"
   2804       if ((C->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
   2805           (!(C->getZExtValue() & 0x80) ||
   2806            hasNoSignedComparisonUses(Node))) {
   2807         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl, MVT::i8);
   2808         SDValue Reg = N0.getNode()->getOperand(0);
   2809 
   2810         // On x86-32, only the ABCD registers have 8-bit subregisters.
   2811         if (!Subtarget->is64Bit()) {
   2812           const TargetRegisterClass *TRC;
   2813           switch (N0.getSimpleValueType().SimpleTy) {
   2814           case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
   2815           case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
   2816           default: llvm_unreachable("Unsupported TEST operand type!");
   2817           }
   2818           SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
   2819           Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
   2820                                                Reg.getValueType(), Reg, RC), 0);
   2821         }
   2822 
   2823         // Extract the l-register.
   2824         SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl,
   2825                                                         MVT::i8, Reg);
   2826 
   2827         // Emit a testb.
   2828         SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri, dl, MVT::i32,
   2829                                                  Subreg, Imm);
   2830         // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
   2831         // one, do not call ReplaceAllUsesWith.
   2832         ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
   2833                     SDValue(NewNode, 0));
   2834         return nullptr;
   2835       }
   2836 
   2837       // For example, "testl %eax, $2048" to "testb %ah, $8".
   2838       if ((C->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
   2839           (!(C->getZExtValue() & 0x8000) ||
   2840            hasNoSignedComparisonUses(Node))) {
   2841         // Shift the immediate right by 8 bits.
   2842         SDValue ShiftedImm = CurDAG->getTargetConstant(C->getZExtValue() >> 8,
   2843                                                        dl, MVT::i8);
   2844         SDValue Reg = N0.getNode()->getOperand(0);
   2845 
   2846         // Put the value in an ABCD register.
   2847         const TargetRegisterClass *TRC;
   2848         switch (N0.getSimpleValueType().SimpleTy) {
   2849         case MVT::i64: TRC = &X86::GR64_ABCDRegClass; break;
   2850         case MVT::i32: TRC = &X86::GR32_ABCDRegClass; break;
   2851         case MVT::i16: TRC = &X86::GR16_ABCDRegClass; break;
   2852         default: llvm_unreachable("Unsupported TEST operand type!");
   2853         }
   2854         SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i32);
   2855         Reg = SDValue(CurDAG->getMachineNode(X86::COPY_TO_REGCLASS, dl,
   2856                                              Reg.getValueType(), Reg, RC), 0);
   2857 
   2858         // Extract the h-register.
   2859         SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl,
   2860                                                         MVT::i8, Reg);
   2861 
   2862         // Emit a testb.  The EXTRACT_SUBREG becomes a COPY that can only
   2863         // target GR8_NOREX registers, so make sure the register class is
   2864         // forced.
   2865         SDNode *NewNode = CurDAG->getMachineNode(X86::TEST8ri_NOREX, dl,
   2866                                                  MVT::i32, Subreg, ShiftedImm);
   2867         // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
   2868         // one, do not call ReplaceAllUsesWith.
   2869         ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
   2870                     SDValue(NewNode, 0));
   2871         return nullptr;
   2872       }
   2873 
   2874       // For example, "testl %eax, $32776" to "testw %ax, $32776".
   2875       if ((C->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
   2876           N0.getValueType() != MVT::i16 &&
   2877           (!(C->getZExtValue() & 0x8000) ||
   2878            hasNoSignedComparisonUses(Node))) {
   2879         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
   2880                                                 MVT::i16);
   2881         SDValue Reg = N0.getNode()->getOperand(0);
   2882 
   2883         // Extract the 16-bit subregister.
   2884         SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_16bit, dl,
   2885                                                         MVT::i16, Reg);
   2886 
   2887         // Emit a testw.
   2888         SDNode *NewNode = CurDAG->getMachineNode(X86::TEST16ri, dl, MVT::i32,
   2889                                                  Subreg, Imm);
   2890         // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
   2891         // one, do not call ReplaceAllUsesWith.
   2892         ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
   2893                     SDValue(NewNode, 0));
   2894         return nullptr;
   2895       }
   2896 
   2897       // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
   2898       if ((C->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
   2899           N0.getValueType() == MVT::i64 &&
   2900           (!(C->getZExtValue() & 0x80000000) ||
   2901            hasNoSignedComparisonUses(Node))) {
   2902         SDValue Imm = CurDAG->getTargetConstant(C->getZExtValue(), dl,
   2903                                                 MVT::i32);
   2904         SDValue Reg = N0.getNode()->getOperand(0);
   2905 
   2906         // Extract the 32-bit subregister.
   2907         SDValue Subreg = CurDAG->getTargetExtractSubreg(X86::sub_32bit, dl,
   2908                                                         MVT::i32, Reg);
   2909 
   2910         // Emit a testl.
   2911         SDNode *NewNode = CurDAG->getMachineNode(X86::TEST32ri, dl, MVT::i32,
   2912                                                  Subreg, Imm);
   2913         // Replace SUB|CMP with TEST, since SUB has two outputs while TEST has
   2914         // one, do not call ReplaceAllUsesWith.
   2915         ReplaceUses(SDValue(Node, (Opcode == X86ISD::SUB ? 1 : 0)),
   2916                     SDValue(NewNode, 0));
   2917         return nullptr;
   2918       }
   2919     }
   2920     break;
   2921   }
   2922   case ISD::STORE: {
   2923     // Change a chain of {load; incr or dec; store} of the same value into
   2924     // a simple increment or decrement through memory of that value, if the
   2925     // uses of the modified value and its address are suitable.
   2926     // The DEC64m tablegen pattern is currently not able to match the case where
   2927     // the EFLAGS on the original DEC are used. (This also applies to
   2928     // {INC,DEC}X{64,32,16,8}.)
   2929     // We'll need to improve tablegen to allow flags to be transferred from a
   2930     // node in the pattern to the result node.  probably with a new keyword
   2931     // for example, we have this
   2932     // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
   2933     //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
   2934     //   (implicit EFLAGS)]>;
   2935     // but maybe need something like this
   2936     // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",
   2937     //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),
   2938     //   (transferrable EFLAGS)]>;
   2939 
   2940     StoreSDNode *StoreNode = cast<StoreSDNode>(Node);
   2941     SDValue StoredVal = StoreNode->getOperand(1);
   2942     unsigned Opc = StoredVal->getOpcode();
   2943 
   2944     LoadSDNode *LoadNode = nullptr;
   2945     SDValue InputChain;
   2946     if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal, CurDAG,
   2947                              LoadNode, InputChain))
   2948       break;
   2949 
   2950     SDValue Base, Scale, Index, Disp, Segment;
   2951     if (!selectAddr(LoadNode, LoadNode->getBasePtr(),
   2952                     Base, Scale, Index, Disp, Segment))
   2953       break;
   2954 
   2955     MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(2);
   2956     MemOp[0] = StoreNode->getMemOperand();
   2957     MemOp[1] = LoadNode->getMemOperand();
   2958     const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain };
   2959     EVT LdVT = LoadNode->getMemoryVT();
   2960     unsigned newOpc = getFusedLdStOpcode(LdVT, Opc);
   2961     MachineSDNode *Result = CurDAG->getMachineNode(newOpc,
   2962                                                    SDLoc(Node),
   2963                                                    MVT::i32, MVT::Other, Ops);
   2964     Result->setMemRefs(MemOp, MemOp + 2);
   2965 
   2966     ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1));
   2967     ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0));
   2968 
   2969     return Result;
   2970   }
   2971   }
   2972 
   2973   SDNode *ResNode = SelectCode(Node);
   2974 
   2975   DEBUG(dbgs() << "=> ";
   2976         if (ResNode == nullptr || ResNode == Node)
   2977           Node->dump(CurDAG);
   2978         else
   2979           ResNode->dump(CurDAG);
   2980         dbgs() << '\n');
   2981 
   2982   return ResNode;
   2983 }
   2984 
   2985 bool X86DAGToDAGISel::
   2986 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   2987                              std::vector<SDValue> &OutOps) {
   2988   SDValue Op0, Op1, Op2, Op3, Op4;
   2989   switch (ConstraintID) {
   2990   default:
   2991     llvm_unreachable("Unexpected asm memory constraint");
   2992   case InlineAsm::Constraint_i:
   2993     // FIXME: It seems strange that 'i' is needed here since it's supposed to
   2994     //        be an immediate and not a memory constraint.
   2995     // Fallthrough.
   2996   case InlineAsm::Constraint_o: // offsetable        ??
   2997   case InlineAsm::Constraint_v: // not offsetable    ??
   2998   case InlineAsm::Constraint_m: // memory
   2999   case InlineAsm::Constraint_X:
   3000     if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4))
   3001       return true;
   3002     break;
   3003   }
   3004 
   3005   OutOps.push_back(Op0);
   3006   OutOps.push_back(Op1);
   3007   OutOps.push_back(Op2);
   3008   OutOps.push_back(Op3);
   3009   OutOps.push_back(Op4);
   3010   return false;
   3011 }
   3012 
   3013 /// This pass converts a legalized DAG into a X86-specific DAG,
   3014 /// ready for instruction scheduling.
   3015 FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM,
   3016                                      CodeGenOpt::Level OptLevel) {
   3017   return new X86DAGToDAGISel(TM, OptLevel);
   3018 }
   3019