Home | History | Annotate | Download | only in X86
      1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the X86-specific support for the FastISel class. Much
     11 // of the target-specific code is generated by tablegen in the file
     12 // X86GenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "X86.h"
     17 #include "X86CallingConv.h"
     18 #include "X86InstrBuilder.h"
     19 #include "X86InstrInfo.h"
     20 #include "X86MachineFunctionInfo.h"
     21 #include "X86RegisterInfo.h"
     22 #include "X86Subtarget.h"
     23 #include "X86TargetMachine.h"
     24 #include "llvm/Analysis/BranchProbabilityInfo.h"
     25 #include "llvm/CodeGen/Analysis.h"
     26 #include "llvm/CodeGen/FastISel.h"
     27 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     28 #include "llvm/CodeGen/MachineConstantPool.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/IR/CallSite.h"
     32 #include "llvm/IR/CallingConv.h"
     33 #include "llvm/IR/DerivedTypes.h"
     34 #include "llvm/IR/GetElementPtrTypeIterator.h"
     35 #include "llvm/IR/GlobalAlias.h"
     36 #include "llvm/IR/GlobalVariable.h"
     37 #include "llvm/IR/Instructions.h"
     38 #include "llvm/IR/IntrinsicInst.h"
     39 #include "llvm/IR/Operator.h"
     40 #include "llvm/MC/MCAsmInfo.h"
     41 #include "llvm/Support/ErrorHandling.h"
     42 #include "llvm/Target/TargetOptions.h"
     43 using namespace llvm;
     44 
     45 namespace {
     46 
     47 class X86FastISel final : public FastISel {
     48   /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     49   /// make the right decision when generating code for different targets.
     50   const X86Subtarget *Subtarget;
     51 
     52   /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     53   /// floating point ops.
     54   /// When SSE is available, use it for f32 operations.
     55   /// When SSE2 is available, use it for f64 operations.
     56   bool X86ScalarSSEf64;
     57   bool X86ScalarSSEf32;
     58 
     59 public:
     60   explicit X86FastISel(FunctionLoweringInfo &funcInfo,
     61                        const TargetLibraryInfo *libInfo)
     62       : FastISel(funcInfo, libInfo) {
     63     Subtarget = &funcInfo.MF->getSubtarget<X86Subtarget>();
     64     X86ScalarSSEf64 = Subtarget->hasSSE2();
     65     X86ScalarSSEf32 = Subtarget->hasSSE1();
     66   }
     67 
     68   bool fastSelectInstruction(const Instruction *I) override;
     69 
     70   /// \brief The specified machine instr operand is a vreg, and that
     71   /// vreg is being provided by the specified load instruction.  If possible,
     72   /// try to fold the load as an operand to the instruction, returning true if
     73   /// possible.
     74   bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
     75                            const LoadInst *LI) override;
     76 
     77   bool fastLowerArguments() override;
     78   bool fastLowerCall(CallLoweringInfo &CLI) override;
     79   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
     80 
     81 #include "X86GenFastISel.inc"
     82 
     83 private:
     84   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT, DebugLoc DL);
     85 
     86   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO,
     87                        unsigned &ResultReg, unsigned Alignment = 1);
     88 
     89   bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM,
     90                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
     91   bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
     92                         const X86AddressMode &AM,
     93                         MachineMemOperand *MMO = nullptr, bool Aligned = false);
     94 
     95   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
     96                          unsigned &ResultReg);
     97 
     98   bool X86SelectAddress(const Value *V, X86AddressMode &AM);
     99   bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
    100 
    101   bool X86SelectLoad(const Instruction *I);
    102 
    103   bool X86SelectStore(const Instruction *I);
    104 
    105   bool X86SelectRet(const Instruction *I);
    106 
    107   bool X86SelectCmp(const Instruction *I);
    108 
    109   bool X86SelectZExt(const Instruction *I);
    110 
    111   bool X86SelectBranch(const Instruction *I);
    112 
    113   bool X86SelectShift(const Instruction *I);
    114 
    115   bool X86SelectDivRem(const Instruction *I);
    116 
    117   bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I);
    118 
    119   bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I);
    120 
    121   bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I);
    122 
    123   bool X86SelectSelect(const Instruction *I);
    124 
    125   bool X86SelectTrunc(const Instruction *I);
    126 
    127   bool X86SelectFPExtOrFPTrunc(const Instruction *I, unsigned Opc,
    128                                const TargetRegisterClass *RC);
    129 
    130   bool X86SelectFPExt(const Instruction *I);
    131   bool X86SelectFPTrunc(const Instruction *I);
    132   bool X86SelectSIToFP(const Instruction *I);
    133 
    134   const X86InstrInfo *getInstrInfo() const {
    135     return Subtarget->getInstrInfo();
    136   }
    137   const X86TargetMachine *getTargetMachine() const {
    138     return static_cast<const X86TargetMachine *>(&TM);
    139   }
    140 
    141   bool handleConstantAddresses(const Value *V, X86AddressMode &AM);
    142 
    143   unsigned X86MaterializeInt(const ConstantInt *CI, MVT VT);
    144   unsigned X86MaterializeFP(const ConstantFP *CFP, MVT VT);
    145   unsigned X86MaterializeGV(const GlobalValue *GV, MVT VT);
    146   unsigned fastMaterializeConstant(const Constant *C) override;
    147 
    148   unsigned fastMaterializeAlloca(const AllocaInst *C) override;
    149 
    150   unsigned fastMaterializeFloatZero(const ConstantFP *CF) override;
    151 
    152   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
    153   /// computed in an SSE register, not on the X87 floating point stack.
    154   bool isScalarFPTypeInSSEReg(EVT VT) const {
    155     return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
    156       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
    157   }
    158 
    159   bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
    160 
    161   bool IsMemcpySmall(uint64_t Len);
    162 
    163   bool TryEmitSmallMemcpy(X86AddressMode DestAM,
    164                           X86AddressMode SrcAM, uint64_t Len);
    165 
    166   bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
    167                             const Value *Cond);
    168 };
    169 
    170 } // end anonymous namespace.
    171 
    172 static std::pair<X86::CondCode, bool>
    173 getX86ConditionCode(CmpInst::Predicate Predicate) {
    174   X86::CondCode CC = X86::COND_INVALID;
    175   bool NeedSwap = false;
    176   switch (Predicate) {
    177   default: break;
    178   // Floating-point Predicates
    179   case CmpInst::FCMP_UEQ: CC = X86::COND_E;       break;
    180   case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
    181   case CmpInst::FCMP_OGT: CC = X86::COND_A;       break;
    182   case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
    183   case CmpInst::FCMP_OGE: CC = X86::COND_AE;      break;
    184   case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
    185   case CmpInst::FCMP_ULT: CC = X86::COND_B;       break;
    186   case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
    187   case CmpInst::FCMP_ULE: CC = X86::COND_BE;      break;
    188   case CmpInst::FCMP_ONE: CC = X86::COND_NE;      break;
    189   case CmpInst::FCMP_UNO: CC = X86::COND_P;       break;
    190   case CmpInst::FCMP_ORD: CC = X86::COND_NP;      break;
    191   case CmpInst::FCMP_OEQ: // fall-through
    192   case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
    193 
    194   // Integer Predicates
    195   case CmpInst::ICMP_EQ:  CC = X86::COND_E;       break;
    196   case CmpInst::ICMP_NE:  CC = X86::COND_NE;      break;
    197   case CmpInst::ICMP_UGT: CC = X86::COND_A;       break;
    198   case CmpInst::ICMP_UGE: CC = X86::COND_AE;      break;
    199   case CmpInst::ICMP_ULT: CC = X86::COND_B;       break;
    200   case CmpInst::ICMP_ULE: CC = X86::COND_BE;      break;
    201   case CmpInst::ICMP_SGT: CC = X86::COND_G;       break;
    202   case CmpInst::ICMP_SGE: CC = X86::COND_GE;      break;
    203   case CmpInst::ICMP_SLT: CC = X86::COND_L;       break;
    204   case CmpInst::ICMP_SLE: CC = X86::COND_LE;      break;
    205   }
    206 
    207   return std::make_pair(CC, NeedSwap);
    208 }
    209 
    210 static std::pair<unsigned, bool>
    211 getX86SSEConditionCode(CmpInst::Predicate Predicate) {
    212   unsigned CC;
    213   bool NeedSwap = false;
    214 
    215   // SSE Condition code mapping:
    216   //  0 - EQ
    217   //  1 - LT
    218   //  2 - LE
    219   //  3 - UNORD
    220   //  4 - NEQ
    221   //  5 - NLT
    222   //  6 - NLE
    223   //  7 - ORD
    224   switch (Predicate) {
    225   default: llvm_unreachable("Unexpected predicate");
    226   case CmpInst::FCMP_OEQ: CC = 0;          break;
    227   case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
    228   case CmpInst::FCMP_OLT: CC = 1;          break;
    229   case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
    230   case CmpInst::FCMP_OLE: CC = 2;          break;
    231   case CmpInst::FCMP_UNO: CC = 3;          break;
    232   case CmpInst::FCMP_UNE: CC = 4;          break;
    233   case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
    234   case CmpInst::FCMP_UGE: CC = 5;          break;
    235   case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
    236   case CmpInst::FCMP_UGT: CC = 6;          break;
    237   case CmpInst::FCMP_ORD: CC = 7;          break;
    238   case CmpInst::FCMP_UEQ:
    239   case CmpInst::FCMP_ONE: CC = 8;          break;
    240   }
    241 
    242   return std::make_pair(CC, NeedSwap);
    243 }
    244 
    245 /// \brief Check if it is possible to fold the condition from the XALU intrinsic
    246 /// into the user. The condition code will only be updated on success.
    247 bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I,
    248                                        const Value *Cond) {
    249   if (!isa<ExtractValueInst>(Cond))
    250     return false;
    251 
    252   const auto *EV = cast<ExtractValueInst>(Cond);
    253   if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
    254     return false;
    255 
    256   const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
    257   MVT RetVT;
    258   const Function *Callee = II->getCalledFunction();
    259   Type *RetTy =
    260     cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
    261   if (!isTypeLegal(RetTy, RetVT))
    262     return false;
    263 
    264   if (RetVT != MVT::i32 && RetVT != MVT::i64)
    265     return false;
    266 
    267   X86::CondCode TmpCC;
    268   switch (II->getIntrinsicID()) {
    269   default: return false;
    270   case Intrinsic::sadd_with_overflow:
    271   case Intrinsic::ssub_with_overflow:
    272   case Intrinsic::smul_with_overflow:
    273   case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break;
    274   case Intrinsic::uadd_with_overflow:
    275   case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break;
    276   }
    277 
    278   // Check if both instructions are in the same basic block.
    279   if (II->getParent() != I->getParent())
    280     return false;
    281 
    282   // Make sure nothing is in the way
    283   BasicBlock::const_iterator Start = I;
    284   BasicBlock::const_iterator End = II;
    285   for (auto Itr = std::prev(Start); Itr != End; --Itr) {
    286     // We only expect extractvalue instructions between the intrinsic and the
    287     // instruction to be selected.
    288     if (!isa<ExtractValueInst>(Itr))
    289       return false;
    290 
    291     // Check that the extractvalue operand comes from the intrinsic.
    292     const auto *EVI = cast<ExtractValueInst>(Itr);
    293     if (EVI->getAggregateOperand() != II)
    294       return false;
    295   }
    296 
    297   CC = TmpCC;
    298   return true;
    299 }
    300 
    301 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
    302   EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
    303   if (evt == MVT::Other || !evt.isSimple())
    304     // Unhandled type. Halt "fast" selection and bail.
    305     return false;
    306 
    307   VT = evt.getSimpleVT();
    308   // For now, require SSE/SSE2 for performing floating-point operations,
    309   // since x87 requires additional work.
    310   if (VT == MVT::f64 && !X86ScalarSSEf64)
    311     return false;
    312   if (VT == MVT::f32 && !X86ScalarSSEf32)
    313     return false;
    314   // Similarly, no f80 support yet.
    315   if (VT == MVT::f80)
    316     return false;
    317   // We only handle legal types. For example, on x86-32 the instruction
    318   // selector contains all of the 64-bit instructions from x86-64,
    319   // under the assumption that i64 won't be used if the target doesn't
    320   // support it.
    321   return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
    322 }
    323 
    324 #include "X86GenCallingConv.inc"
    325 
    326 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
    327 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
    328 /// Return true and the result register by reference if it is possible.
    329 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
    330                                   MachineMemOperand *MMO, unsigned &ResultReg,
    331                                   unsigned Alignment) {
    332   // Get opcode and regclass of the output for the given load instruction.
    333   unsigned Opc = 0;
    334   const TargetRegisterClass *RC = nullptr;
    335   switch (VT.getSimpleVT().SimpleTy) {
    336   default: return false;
    337   case MVT::i1:
    338   case MVT::i8:
    339     Opc = X86::MOV8rm;
    340     RC  = &X86::GR8RegClass;
    341     break;
    342   case MVT::i16:
    343     Opc = X86::MOV16rm;
    344     RC  = &X86::GR16RegClass;
    345     break;
    346   case MVT::i32:
    347     Opc = X86::MOV32rm;
    348     RC  = &X86::GR32RegClass;
    349     break;
    350   case MVT::i64:
    351     // Must be in x86-64 mode.
    352     Opc = X86::MOV64rm;
    353     RC  = &X86::GR64RegClass;
    354     break;
    355   case MVT::f32:
    356     if (X86ScalarSSEf32) {
    357       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
    358       RC  = &X86::FR32RegClass;
    359     } else {
    360       Opc = X86::LD_Fp32m;
    361       RC  = &X86::RFP32RegClass;
    362     }
    363     break;
    364   case MVT::f64:
    365     if (X86ScalarSSEf64) {
    366       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
    367       RC  = &X86::FR64RegClass;
    368     } else {
    369       Opc = X86::LD_Fp64m;
    370       RC  = &X86::RFP64RegClass;
    371     }
    372     break;
    373   case MVT::f80:
    374     // No f80 support yet.
    375     return false;
    376   case MVT::v4f32:
    377     if (Alignment >= 16)
    378       Opc = Subtarget->hasAVX() ? X86::VMOVAPSrm : X86::MOVAPSrm;
    379     else
    380       Opc = Subtarget->hasAVX() ? X86::VMOVUPSrm : X86::MOVUPSrm;
    381     RC  = &X86::VR128RegClass;
    382     break;
    383   case MVT::v2f64:
    384     if (Alignment >= 16)
    385       Opc = Subtarget->hasAVX() ? X86::VMOVAPDrm : X86::MOVAPDrm;
    386     else
    387       Opc = Subtarget->hasAVX() ? X86::VMOVUPDrm : X86::MOVUPDrm;
    388     RC  = &X86::VR128RegClass;
    389     break;
    390   case MVT::v4i32:
    391   case MVT::v2i64:
    392   case MVT::v8i16:
    393   case MVT::v16i8:
    394     if (Alignment >= 16)
    395       Opc = Subtarget->hasAVX() ? X86::VMOVDQArm : X86::MOVDQArm;
    396     else
    397       Opc = Subtarget->hasAVX() ? X86::VMOVDQUrm : X86::MOVDQUrm;
    398     RC  = &X86::VR128RegClass;
    399     break;
    400   }
    401 
    402   ResultReg = createResultReg(RC);
    403   MachineInstrBuilder MIB =
    404     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
    405   addFullAddress(MIB, AM);
    406   if (MMO)
    407     MIB->addMemOperand(*FuncInfo.MF, MMO);
    408   return true;
    409 }
    410 
    411 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
    412 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
    413 /// and a displacement offset, or a GlobalAddress,
    414 /// i.e. V. Return true if it is possible.
    415 bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
    416                                    const X86AddressMode &AM,
    417                                    MachineMemOperand *MMO, bool Aligned) {
    418   // Get opcode and regclass of the output for the given store instruction.
    419   unsigned Opc = 0;
    420   switch (VT.getSimpleVT().SimpleTy) {
    421   case MVT::f80: // No f80 support yet.
    422   default: return false;
    423   case MVT::i1: {
    424     // Mask out all but lowest bit.
    425     unsigned AndResult = createResultReg(&X86::GR8RegClass);
    426     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
    427             TII.get(X86::AND8ri), AndResult)
    428       .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
    429     ValReg = AndResult;
    430   }
    431   // FALLTHROUGH, handling i1 as i8.
    432   case MVT::i8:  Opc = X86::MOV8mr;  break;
    433   case MVT::i16: Opc = X86::MOV16mr; break;
    434   case MVT::i32: Opc = X86::MOV32mr; break;
    435   case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
    436   case MVT::f32:
    437     Opc = X86ScalarSSEf32 ?
    438           (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
    439     break;
    440   case MVT::f64:
    441     Opc = X86ScalarSSEf64 ?
    442           (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
    443     break;
    444   case MVT::v4f32:
    445     if (Aligned)
    446       Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;
    447     else
    448       Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
    449     break;
    450   case MVT::v2f64:
    451     if (Aligned)
    452       Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr;
    453     else
    454       Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr;
    455     break;
    456   case MVT::v4i32:
    457   case MVT::v2i64:
    458   case MVT::v8i16:
    459   case MVT::v16i8:
    460     if (Aligned)
    461       Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr;
    462     else
    463       Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr;
    464     break;
    465   }
    466 
    467   MachineInstrBuilder MIB =
    468     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
    469   addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill));
    470   if (MMO)
    471     MIB->addMemOperand(*FuncInfo.MF, MMO);
    472 
    473   return true;
    474 }
    475 
    476 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
    477                                    const X86AddressMode &AM,
    478                                    MachineMemOperand *MMO, bool Aligned) {
    479   // Handle 'null' like i32/i64 0.
    480   if (isa<ConstantPointerNull>(Val))
    481     Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext()));
    482 
    483   // If this is a store of a simple constant, fold the constant into the store.
    484   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
    485     unsigned Opc = 0;
    486     bool Signed = true;
    487     switch (VT.getSimpleVT().SimpleTy) {
    488     default: break;
    489     case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
    490     case MVT::i8:  Opc = X86::MOV8mi;  break;
    491     case MVT::i16: Opc = X86::MOV16mi; break;
    492     case MVT::i32: Opc = X86::MOV32mi; break;
    493     case MVT::i64:
    494       // Must be a 32-bit sign extended value.
    495       if (isInt<32>(CI->getSExtValue()))
    496         Opc = X86::MOV64mi32;
    497       break;
    498     }
    499 
    500     if (Opc) {
    501       MachineInstrBuilder MIB =
    502         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
    503       addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue()
    504                                             : CI->getZExtValue());
    505       if (MMO)
    506         MIB->addMemOperand(*FuncInfo.MF, MMO);
    507       return true;
    508     }
    509   }
    510 
    511   unsigned ValReg = getRegForValue(Val);
    512   if (ValReg == 0)
    513     return false;
    514 
    515   bool ValKill = hasTrivialKill(Val);
    516   return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned);
    517 }
    518 
    519 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
    520 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
    521 /// ISD::SIGN_EXTEND).
    522 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
    523                                     unsigned Src, EVT SrcVT,
    524                                     unsigned &ResultReg) {
    525   unsigned RR = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
    526                            Src, /*TODO: Kill=*/false);
    527   if (RR == 0)
    528     return false;
    529 
    530   ResultReg = RR;
    531   return true;
    532 }
    533 
    534 bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) {
    535   // Handle constant address.
    536   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
    537     // Can't handle alternate code models yet.
    538     if (TM.getCodeModel() != CodeModel::Small)
    539       return false;
    540 
    541     // Can't handle TLS yet.
    542     if (GV->isThreadLocal())
    543       return false;
    544 
    545     // RIP-relative addresses can't have additional register operands, so if
    546     // we've already folded stuff into the addressing mode, just force the
    547     // global value into its own register, which we can use as the basereg.
    548     if (!Subtarget->isPICStyleRIPRel() ||
    549         (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
    550       // Okay, we've committed to selecting this global. Set up the address.
    551       AM.GV = GV;
    552 
    553       // Allow the subtarget to classify the global.
    554       unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
    555 
    556       // If this reference is relative to the pic base, set it now.
    557       if (isGlobalRelativeToPICBase(GVFlags)) {
    558         // FIXME: How do we know Base.Reg is free??
    559         AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
    560       }
    561 
    562       // Unless the ABI requires an extra load, return a direct reference to
    563       // the global.
    564       if (!isGlobalStubReference(GVFlags)) {
    565         if (Subtarget->isPICStyleRIPRel()) {
    566           // Use rip-relative addressing if we can.  Above we verified that the
    567           // base and index registers are unused.
    568           assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
    569           AM.Base.Reg = X86::RIP;
    570         }
    571         AM.GVOpFlags = GVFlags;
    572         return true;
    573       }
    574 
    575       // Ok, we need to do a load from a stub.  If we've already loaded from
    576       // this stub, reuse the loaded pointer, otherwise emit the load now.
    577       DenseMap<const Value *, unsigned>::iterator I = LocalValueMap.find(V);
    578       unsigned LoadReg;
    579       if (I != LocalValueMap.end() && I->second != 0) {
    580         LoadReg = I->second;
    581       } else {
    582         // Issue load from stub.
    583         unsigned Opc = 0;
    584         const TargetRegisterClass *RC = nullptr;
    585         X86AddressMode StubAM;
    586         StubAM.Base.Reg = AM.Base.Reg;
    587         StubAM.GV = GV;
    588         StubAM.GVOpFlags = GVFlags;
    589 
    590         // Prepare for inserting code in the local-value area.
    591         SavePoint SaveInsertPt = enterLocalValueArea();
    592 
    593         if (TLI.getPointerTy() == MVT::i64) {
    594           Opc = X86::MOV64rm;
    595           RC  = &X86::GR64RegClass;
    596 
    597           if (Subtarget->isPICStyleRIPRel())
    598             StubAM.Base.Reg = X86::RIP;
    599         } else {
    600           Opc = X86::MOV32rm;
    601           RC  = &X86::GR32RegClass;
    602         }
    603 
    604         LoadReg = createResultReg(RC);
    605         MachineInstrBuilder LoadMI =
    606           BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), LoadReg);
    607         addFullAddress(LoadMI, StubAM);
    608 
    609         // Ok, back to normal mode.
    610         leaveLocalValueArea(SaveInsertPt);
    611 
    612         // Prevent loading GV stub multiple times in same MBB.
    613         LocalValueMap[V] = LoadReg;
    614       }
    615 
    616       // Now construct the final address. Note that the Disp, Scale,
    617       // and Index values may already be set here.
    618       AM.Base.Reg = LoadReg;
    619       AM.GV = nullptr;
    620       return true;
    621     }
    622   }
    623 
    624   // If all else fails, try to materialize the value in a register.
    625   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
    626     if (AM.Base.Reg == 0) {
    627       AM.Base.Reg = getRegForValue(V);
    628       return AM.Base.Reg != 0;
    629     }
    630     if (AM.IndexReg == 0) {
    631       assert(AM.Scale == 1 && "Scale with no index!");
    632       AM.IndexReg = getRegForValue(V);
    633       return AM.IndexReg != 0;
    634     }
    635   }
    636 
    637   return false;
    638 }
    639 
    640 /// X86SelectAddress - Attempt to fill in an address from the given value.
    641 ///
    642 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
    643   SmallVector<const Value *, 32> GEPs;
    644 redo_gep:
    645   const User *U = nullptr;
    646   unsigned Opcode = Instruction::UserOp1;
    647   if (const Instruction *I = dyn_cast<Instruction>(V)) {
    648     // Don't walk into other basic blocks; it's possible we haven't
    649     // visited them yet, so the instructions may not yet be assigned
    650     // virtual registers.
    651     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
    652         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    653       Opcode = I->getOpcode();
    654       U = I;
    655     }
    656   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
    657     Opcode = C->getOpcode();
    658     U = C;
    659   }
    660 
    661   if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
    662     if (Ty->getAddressSpace() > 255)
    663       // Fast instruction selection doesn't support the special
    664       // address spaces.
    665       return false;
    666 
    667   switch (Opcode) {
    668   default: break;
    669   case Instruction::BitCast:
    670     // Look past bitcasts.
    671     return X86SelectAddress(U->getOperand(0), AM);
    672 
    673   case Instruction::IntToPtr:
    674     // Look past no-op inttoptrs.
    675     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    676       return X86SelectAddress(U->getOperand(0), AM);
    677     break;
    678 
    679   case Instruction::PtrToInt:
    680     // Look past no-op ptrtoints.
    681     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    682       return X86SelectAddress(U->getOperand(0), AM);
    683     break;
    684 
    685   case Instruction::Alloca: {
    686     // Do static allocas.
    687     const AllocaInst *A = cast<AllocaInst>(V);
    688     DenseMap<const AllocaInst *, int>::iterator SI =
    689       FuncInfo.StaticAllocaMap.find(A);
    690     if (SI != FuncInfo.StaticAllocaMap.end()) {
    691       AM.BaseType = X86AddressMode::FrameIndexBase;
    692       AM.Base.FrameIndex = SI->second;
    693       return true;
    694     }
    695     break;
    696   }
    697 
    698   case Instruction::Add: {
    699     // Adds of constants are common and easy enough.
    700     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
    701       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
    702       // They have to fit in the 32-bit signed displacement field though.
    703       if (isInt<32>(Disp)) {
    704         AM.Disp = (uint32_t)Disp;
    705         return X86SelectAddress(U->getOperand(0), AM);
    706       }
    707     }
    708     break;
    709   }
    710 
    711   case Instruction::GetElementPtr: {
    712     X86AddressMode SavedAM = AM;
    713 
    714     // Pattern-match simple GEPs.
    715     uint64_t Disp = (int32_t)AM.Disp;
    716     unsigned IndexReg = AM.IndexReg;
    717     unsigned Scale = AM.Scale;
    718     gep_type_iterator GTI = gep_type_begin(U);
    719     // Iterate through the indices, folding what we can. Constants can be
    720     // folded, and one dynamic index can be handled, if the scale is supported.
    721     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
    722          i != e; ++i, ++GTI) {
    723       const Value *Op = *i;
    724       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    725         const StructLayout *SL = DL.getStructLayout(STy);
    726         Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
    727         continue;
    728       }
    729 
    730       // A array/variable index is always of the form i*S where S is the
    731       // constant scale size.  See if we can push the scale into immediates.
    732       uint64_t S = DL.getTypeAllocSize(GTI.getIndexedType());
    733       for (;;) {
    734         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    735           // Constant-offset addressing.
    736           Disp += CI->getSExtValue() * S;
    737           break;
    738         }
    739         if (canFoldAddIntoGEP(U, Op)) {
    740           // A compatible add with a constant operand. Fold the constant.
    741           ConstantInt *CI =
    742             cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    743           Disp += CI->getSExtValue() * S;
    744           // Iterate on the other operand.
    745           Op = cast<AddOperator>(Op)->getOperand(0);
    746           continue;
    747         }
    748         if (IndexReg == 0 &&
    749             (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
    750             (S == 1 || S == 2 || S == 4 || S == 8)) {
    751           // Scaled-index addressing.
    752           Scale = S;
    753           IndexReg = getRegForGEPIndex(Op).first;
    754           if (IndexReg == 0)
    755             return false;
    756           break;
    757         }
    758         // Unsupported.
    759         goto unsupported_gep;
    760       }
    761     }
    762 
    763     // Check for displacement overflow.
    764     if (!isInt<32>(Disp))
    765       break;
    766 
    767     AM.IndexReg = IndexReg;
    768     AM.Scale = Scale;
    769     AM.Disp = (uint32_t)Disp;
    770     GEPs.push_back(V);
    771 
    772     if (const GetElementPtrInst *GEP =
    773           dyn_cast<GetElementPtrInst>(U->getOperand(0))) {
    774       // Ok, the GEP indices were covered by constant-offset and scaled-index
    775       // addressing. Update the address state and move on to examining the base.
    776       V = GEP;
    777       goto redo_gep;
    778     } else if (X86SelectAddress(U->getOperand(0), AM)) {
    779       return true;
    780     }
    781 
    782     // If we couldn't merge the gep value into this addr mode, revert back to
    783     // our address and just match the value instead of completely failing.
    784     AM = SavedAM;
    785 
    786     for (SmallVectorImpl<const Value *>::reverse_iterator
    787            I = GEPs.rbegin(), E = GEPs.rend(); I != E; ++I)
    788       if (handleConstantAddresses(*I, AM))
    789         return true;
    790 
    791     return false;
    792   unsupported_gep:
    793     // Ok, the GEP indices weren't all covered.
    794     break;
    795   }
    796   }
    797 
    798   return handleConstantAddresses(V, AM);
    799 }
    800 
    801 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
    802 ///
    803 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
    804   const User *U = nullptr;
    805   unsigned Opcode = Instruction::UserOp1;
    806   const Instruction *I = dyn_cast<Instruction>(V);
    807   // Record if the value is defined in the same basic block.
    808   //
    809   // This information is crucial to know whether or not folding an
    810   // operand is valid.
    811   // Indeed, FastISel generates or reuses a virtual register for all
    812   // operands of all instructions it selects. Obviously, the definition and
    813   // its uses must use the same virtual register otherwise the produced
    814   // code is incorrect.
    815   // Before instruction selection, FunctionLoweringInfo::set sets the virtual
    816   // registers for values that are alive across basic blocks. This ensures
    817   // that the values are consistently set between across basic block, even
    818   // if different instruction selection mechanisms are used (e.g., a mix of
    819   // SDISel and FastISel).
    820   // For values local to a basic block, the instruction selection process
    821   // generates these virtual registers with whatever method is appropriate
    822   // for its needs. In particular, FastISel and SDISel do not share the way
    823   // local virtual registers are set.
    824   // Therefore, this is impossible (or at least unsafe) to share values
    825   // between basic blocks unless they use the same instruction selection
    826   // method, which is not guarantee for X86.
    827   // Moreover, things like hasOneUse could not be used accurately, if we
    828   // allow to reference values across basic blocks whereas they are not
    829   // alive across basic blocks initially.
    830   bool InMBB = true;
    831   if (I) {
    832     Opcode = I->getOpcode();
    833     U = I;
    834     InMBB = I->getParent() == FuncInfo.MBB->getBasicBlock();
    835   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
    836     Opcode = C->getOpcode();
    837     U = C;
    838   }
    839 
    840   switch (Opcode) {
    841   default: break;
    842   case Instruction::BitCast:
    843     // Look past bitcasts if its operand is in the same BB.
    844     if (InMBB)
    845       return X86SelectCallAddress(U->getOperand(0), AM);
    846     break;
    847 
    848   case Instruction::IntToPtr:
    849     // Look past no-op inttoptrs if its operand is in the same BB.
    850     if (InMBB &&
    851         TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    852       return X86SelectCallAddress(U->getOperand(0), AM);
    853     break;
    854 
    855   case Instruction::PtrToInt:
    856     // Look past no-op ptrtoints if its operand is in the same BB.
    857     if (InMBB &&
    858         TLI.getValueType(U->getType()) == TLI.getPointerTy())
    859       return X86SelectCallAddress(U->getOperand(0), AM);
    860     break;
    861   }
    862 
    863   // Handle constant address.
    864   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
    865     // Can't handle alternate code models yet.
    866     if (TM.getCodeModel() != CodeModel::Small)
    867       return false;
    868 
    869     // RIP-relative addresses can't have additional register operands.
    870     if (Subtarget->isPICStyleRIPRel() &&
    871         (AM.Base.Reg != 0 || AM.IndexReg != 0))
    872       return false;
    873 
    874     // Can't handle DLL Import.
    875     if (GV->hasDLLImportStorageClass())
    876       return false;
    877 
    878     // Can't handle TLS.
    879     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
    880       if (GVar->isThreadLocal())
    881         return false;
    882 
    883     // Okay, we've committed to selecting this global. Set up the basic address.
    884     AM.GV = GV;
    885 
    886     // No ABI requires an extra load for anything other than DLLImport, which
    887     // we rejected above. Return a direct reference to the global.
    888     if (Subtarget->isPICStyleRIPRel()) {
    889       // Use rip-relative addressing if we can.  Above we verified that the
    890       // base and index registers are unused.
    891       assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
    892       AM.Base.Reg = X86::RIP;
    893     } else if (Subtarget->isPICStyleStubPIC()) {
    894       AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
    895     } else if (Subtarget->isPICStyleGOT()) {
    896       AM.GVOpFlags = X86II::MO_GOTOFF;
    897     }
    898 
    899     return true;
    900   }
    901 
    902   // If all else fails, try to materialize the value in a register.
    903   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
    904     if (AM.Base.Reg == 0) {
    905       AM.Base.Reg = getRegForValue(V);
    906       return AM.Base.Reg != 0;
    907     }
    908     if (AM.IndexReg == 0) {
    909       assert(AM.Scale == 1 && "Scale with no index!");
    910       AM.IndexReg = getRegForValue(V);
    911       return AM.IndexReg != 0;
    912     }
    913   }
    914 
    915   return false;
    916 }
    917 
    918 
    919 /// X86SelectStore - Select and emit code to implement store instructions.
    920 bool X86FastISel::X86SelectStore(const Instruction *I) {
    921   // Atomic stores need special handling.
    922   const StoreInst *S = cast<StoreInst>(I);
    923 
    924   if (S->isAtomic())
    925     return false;
    926 
    927   const Value *Val = S->getValueOperand();
    928   const Value *Ptr = S->getPointerOperand();
    929 
    930   MVT VT;
    931   if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true))
    932     return false;
    933 
    934   unsigned Alignment = S->getAlignment();
    935   unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType());
    936   if (Alignment == 0) // Ensure that codegen never sees alignment 0
    937     Alignment = ABIAlignment;
    938   bool Aligned = Alignment >= ABIAlignment;
    939 
    940   X86AddressMode AM;
    941   if (!X86SelectAddress(Ptr, AM))
    942     return false;
    943 
    944   return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned);
    945 }
    946 
    947 /// X86SelectRet - Select and emit code to implement ret instructions.
    948 bool X86FastISel::X86SelectRet(const Instruction *I) {
    949   const ReturnInst *Ret = cast<ReturnInst>(I);
    950   const Function &F = *I->getParent()->getParent();
    951   const X86MachineFunctionInfo *X86MFInfo =
    952       FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
    953 
    954   if (!FuncInfo.CanLowerReturn)
    955     return false;
    956 
    957   CallingConv::ID CC = F.getCallingConv();
    958   if (CC != CallingConv::C &&
    959       CC != CallingConv::Fast &&
    960       CC != CallingConv::X86_FastCall &&
    961       CC != CallingConv::X86_64_SysV)
    962     return false;
    963 
    964   if (Subtarget->isCallingConvWin64(CC))
    965     return false;
    966 
    967   // Don't handle popping bytes on return for now.
    968   if (X86MFInfo->getBytesToPopOnReturn() != 0)
    969     return false;
    970 
    971   // fastcc with -tailcallopt is intended to provide a guaranteed
    972   // tail call optimization. Fastisel doesn't know how to do that.
    973   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
    974     return false;
    975 
    976   // Let SDISel handle vararg functions.
    977   if (F.isVarArg())
    978     return false;
    979 
    980   // Build a list of return value registers.
    981   SmallVector<unsigned, 4> RetRegs;
    982 
    983   if (Ret->getNumOperands() > 0) {
    984     SmallVector<ISD::OutputArg, 4> Outs;
    985     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
    986 
    987     // Analyze operands of the call, assigning locations to each operand.
    988     SmallVector<CCValAssign, 16> ValLocs;
    989     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, I->getContext());
    990     CCInfo.AnalyzeReturn(Outs, RetCC_X86);
    991 
    992     const Value *RV = Ret->getOperand(0);
    993     unsigned Reg = getRegForValue(RV);
    994     if (Reg == 0)
    995       return false;
    996 
    997     // Only handle a single return value for now.
    998     if (ValLocs.size() != 1)
    999       return false;
   1000 
   1001     CCValAssign &VA = ValLocs[0];
   1002 
   1003     // Don't bother handling odd stuff for now.
   1004     if (VA.getLocInfo() != CCValAssign::Full)
   1005       return false;
   1006     // Only handle register returns for now.
   1007     if (!VA.isRegLoc())
   1008       return false;
   1009 
   1010     // The calling-convention tables for x87 returns don't tell
   1011     // the whole story.
   1012     if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
   1013       return false;
   1014 
   1015     unsigned SrcReg = Reg + VA.getValNo();
   1016     EVT SrcVT = TLI.getValueType(RV->getType());
   1017     EVT DstVT = VA.getValVT();
   1018     // Special handling for extended integers.
   1019     if (SrcVT != DstVT) {
   1020       if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
   1021         return false;
   1022 
   1023       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
   1024         return false;
   1025 
   1026       assert(DstVT == MVT::i32 && "X86 should always ext to i32");
   1027 
   1028       if (SrcVT == MVT::i1) {
   1029         if (Outs[0].Flags.isSExt())
   1030           return false;
   1031         SrcReg = fastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
   1032         SrcVT = MVT::i8;
   1033       }
   1034       unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
   1035                                              ISD::SIGN_EXTEND;
   1036       SrcReg = fastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
   1037                           SrcReg, /*TODO: Kill=*/false);
   1038     }
   1039 
   1040     // Make the copy.
   1041     unsigned DstReg = VA.getLocReg();
   1042     const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
   1043     // Avoid a cross-class copy. This is very unlikely.
   1044     if (!SrcRC->contains(DstReg))
   1045       return false;
   1046     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1047             TII.get(TargetOpcode::COPY), DstReg).addReg(SrcReg);
   1048 
   1049     // Add register to return instruction.
   1050     RetRegs.push_back(VA.getLocReg());
   1051   }
   1052 
   1053   // The x86-64 ABI for returning structs by value requires that we copy
   1054   // the sret argument into %rax for the return. We saved the argument into
   1055   // a virtual register in the entry block, so now we copy the value out
   1056   // and into %rax. We also do the same with %eax for Win32.
   1057   if (F.hasStructRetAttr() &&
   1058       (Subtarget->is64Bit() || Subtarget->isTargetKnownWindowsMSVC())) {
   1059     unsigned Reg = X86MFInfo->getSRetReturnReg();
   1060     assert(Reg &&
   1061            "SRetReturnReg should have been set in LowerFormalArguments()!");
   1062     unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
   1063     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1064             TII.get(TargetOpcode::COPY), RetReg).addReg(Reg);
   1065     RetRegs.push_back(RetReg);
   1066   }
   1067 
   1068   // Now emit the RET.
   1069   MachineInstrBuilder MIB =
   1070     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1071             TII.get(Subtarget->is64Bit() ? X86::RETQ : X86::RETL));
   1072   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
   1073     MIB.addReg(RetRegs[i], RegState::Implicit);
   1074   return true;
   1075 }
   1076 
   1077 /// X86SelectLoad - Select and emit code to implement load instructions.
   1078 ///
   1079 bool X86FastISel::X86SelectLoad(const Instruction *I) {
   1080   const LoadInst *LI = cast<LoadInst>(I);
   1081 
   1082   // Atomic loads need special handling.
   1083   if (LI->isAtomic())
   1084     return false;
   1085 
   1086   MVT VT;
   1087   if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true))
   1088     return false;
   1089 
   1090   const Value *Ptr = LI->getPointerOperand();
   1091 
   1092   X86AddressMode AM;
   1093   if (!X86SelectAddress(Ptr, AM))
   1094     return false;
   1095 
   1096   unsigned Alignment = LI->getAlignment();
   1097   unsigned ABIAlignment = DL.getABITypeAlignment(LI->getType());
   1098   if (Alignment == 0) // Ensure that codegen never sees alignment 0
   1099     Alignment = ABIAlignment;
   1100 
   1101   unsigned ResultReg = 0;
   1102   if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg,
   1103                        Alignment))
   1104     return false;
   1105 
   1106   updateValueMap(I, ResultReg);
   1107   return true;
   1108 }
   1109 
   1110 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
   1111   bool HasAVX = Subtarget->hasAVX();
   1112   bool X86ScalarSSEf32 = Subtarget->hasSSE1();
   1113   bool X86ScalarSSEf64 = Subtarget->hasSSE2();
   1114 
   1115   switch (VT.getSimpleVT().SimpleTy) {
   1116   default:       return 0;
   1117   case MVT::i8:  return X86::CMP8rr;
   1118   case MVT::i16: return X86::CMP16rr;
   1119   case MVT::i32: return X86::CMP32rr;
   1120   case MVT::i64: return X86::CMP64rr;
   1121   case MVT::f32:
   1122     return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
   1123   case MVT::f64:
   1124     return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
   1125   }
   1126 }
   1127 
   1128 /// If we have a comparison with RHS as the RHS  of the comparison, return an
   1129 /// opcode that works for the compare (e.g. CMP32ri) otherwise return 0.
   1130 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
   1131   int64_t Val = RHSC->getSExtValue();
   1132   switch (VT.getSimpleVT().SimpleTy) {
   1133   // Otherwise, we can't fold the immediate into this comparison.
   1134   default:
   1135     return 0;
   1136   case MVT::i8:
   1137     return X86::CMP8ri;
   1138   case MVT::i16:
   1139     if (isInt<8>(Val))
   1140       return X86::CMP16ri8;
   1141     return X86::CMP16ri;
   1142   case MVT::i32:
   1143     if (isInt<8>(Val))
   1144       return X86::CMP32ri8;
   1145     return X86::CMP32ri;
   1146   case MVT::i64:
   1147     if (isInt<8>(Val))
   1148       return X86::CMP64ri8;
   1149     // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
   1150     // field.
   1151     if (isInt<32>(Val))
   1152       return X86::CMP64ri32;
   1153     return 0;
   1154   }
   1155 }
   1156 
   1157 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
   1158                                      EVT VT, DebugLoc CurDbgLoc) {
   1159   unsigned Op0Reg = getRegForValue(Op0);
   1160   if (Op0Reg == 0) return false;
   1161 
   1162   // Handle 'null' like i32/i64 0.
   1163   if (isa<ConstantPointerNull>(Op1))
   1164     Op1 = Constant::getNullValue(DL.getIntPtrType(Op0->getContext()));
   1165 
   1166   // We have two options: compare with register or immediate.  If the RHS of
   1167   // the compare is an immediate that we can fold into this compare, use
   1168   // CMPri, otherwise use CMPrr.
   1169   if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
   1170     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
   1171       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareImmOpc))
   1172         .addReg(Op0Reg)
   1173         .addImm(Op1C->getSExtValue());
   1174       return true;
   1175     }
   1176   }
   1177 
   1178   unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
   1179   if (CompareOpc == 0) return false;
   1180 
   1181   unsigned Op1Reg = getRegForValue(Op1);
   1182   if (Op1Reg == 0) return false;
   1183   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, CurDbgLoc, TII.get(CompareOpc))
   1184     .addReg(Op0Reg)
   1185     .addReg(Op1Reg);
   1186 
   1187   return true;
   1188 }
   1189 
   1190 bool X86FastISel::X86SelectCmp(const Instruction *I) {
   1191   const CmpInst *CI = cast<CmpInst>(I);
   1192 
   1193   MVT VT;
   1194   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
   1195     return false;
   1196 
   1197   // Try to optimize or fold the cmp.
   1198   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   1199   unsigned ResultReg = 0;
   1200   switch (Predicate) {
   1201   default: break;
   1202   case CmpInst::FCMP_FALSE: {
   1203     ResultReg = createResultReg(&X86::GR32RegClass);
   1204     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0),
   1205             ResultReg);
   1206     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true,
   1207                                            X86::sub_8bit);
   1208     if (!ResultReg)
   1209       return false;
   1210     break;
   1211   }
   1212   case CmpInst::FCMP_TRUE: {
   1213     ResultReg = createResultReg(&X86::GR8RegClass);
   1214     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
   1215             ResultReg).addImm(1);
   1216     break;
   1217   }
   1218   }
   1219 
   1220   if (ResultReg) {
   1221     updateValueMap(I, ResultReg);
   1222     return true;
   1223   }
   1224 
   1225   const Value *LHS = CI->getOperand(0);
   1226   const Value *RHS = CI->getOperand(1);
   1227 
   1228   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
   1229   // We don't have to materialize a zero constant for this case and can just use
   1230   // %x again on the RHS.
   1231   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
   1232     const auto *RHSC = dyn_cast<ConstantFP>(RHS);
   1233     if (RHSC && RHSC->isNullValue())
   1234       RHS = LHS;
   1235   }
   1236 
   1237   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
   1238   static unsigned SETFOpcTable[2][3] = {
   1239     { X86::SETEr,  X86::SETNPr, X86::AND8rr },
   1240     { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
   1241   };
   1242   unsigned *SETFOpc = nullptr;
   1243   switch (Predicate) {
   1244   default: break;
   1245   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
   1246   case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break;
   1247   }
   1248 
   1249   ResultReg = createResultReg(&X86::GR8RegClass);
   1250   if (SETFOpc) {
   1251     if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
   1252       return false;
   1253 
   1254     unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
   1255     unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
   1256     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
   1257             FlagReg1);
   1258     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
   1259             FlagReg2);
   1260     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]),
   1261             ResultReg).addReg(FlagReg1).addReg(FlagReg2);
   1262     updateValueMap(I, ResultReg);
   1263     return true;
   1264   }
   1265 
   1266   X86::CondCode CC;
   1267   bool SwapArgs;
   1268   std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
   1269   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
   1270   unsigned Opc = X86::getSETFromCond(CC);
   1271 
   1272   if (SwapArgs)
   1273     std::swap(LHS, RHS);
   1274 
   1275   // Emit a compare of LHS/RHS.
   1276   if (!X86FastEmitCompare(LHS, RHS, VT, I->getDebugLoc()))
   1277     return false;
   1278 
   1279   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
   1280   updateValueMap(I, ResultReg);
   1281   return true;
   1282 }
   1283 
   1284 bool X86FastISel::X86SelectZExt(const Instruction *I) {
   1285   EVT DstVT = TLI.getValueType(I->getType());
   1286   if (!TLI.isTypeLegal(DstVT))
   1287     return false;
   1288 
   1289   unsigned ResultReg = getRegForValue(I->getOperand(0));
   1290   if (ResultReg == 0)
   1291     return false;
   1292 
   1293   // Handle zero-extension from i1 to i8, which is common.
   1294   MVT SrcVT = TLI.getSimpleValueType(I->getOperand(0)->getType());
   1295   if (SrcVT.SimpleTy == MVT::i1) {
   1296     // Set the high bits to zero.
   1297     ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
   1298     SrcVT = MVT::i8;
   1299 
   1300     if (ResultReg == 0)
   1301       return false;
   1302   }
   1303 
   1304   if (DstVT == MVT::i64) {
   1305     // Handle extension to 64-bits via sub-register shenanigans.
   1306     unsigned MovInst;
   1307 
   1308     switch (SrcVT.SimpleTy) {
   1309     case MVT::i8:  MovInst = X86::MOVZX32rr8;  break;
   1310     case MVT::i16: MovInst = X86::MOVZX32rr16; break;
   1311     case MVT::i32: MovInst = X86::MOV32rr;     break;
   1312     default: llvm_unreachable("Unexpected zext to i64 source type");
   1313     }
   1314 
   1315     unsigned Result32 = createResultReg(&X86::GR32RegClass);
   1316     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(MovInst), Result32)
   1317       .addReg(ResultReg);
   1318 
   1319     ResultReg = createResultReg(&X86::GR64RegClass);
   1320     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::SUBREG_TO_REG),
   1321             ResultReg)
   1322       .addImm(0).addReg(Result32).addImm(X86::sub_32bit);
   1323   } else if (DstVT != MVT::i8) {
   1324     ResultReg = fastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
   1325                            ResultReg, /*Kill=*/true);
   1326     if (ResultReg == 0)
   1327       return false;
   1328   }
   1329 
   1330   updateValueMap(I, ResultReg);
   1331   return true;
   1332 }
   1333 
   1334 bool X86FastISel::X86SelectBranch(const Instruction *I) {
   1335   // Unconditional branches are selected by tablegen-generated code.
   1336   // Handle a conditional branch.
   1337   const BranchInst *BI = cast<BranchInst>(I);
   1338   MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   1339   MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   1340 
   1341   // Fold the common case of a conditional branch with a comparison
   1342   // in the same block (values defined on other blocks may not have
   1343   // initialized registers).
   1344   X86::CondCode CC;
   1345   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
   1346     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
   1347       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
   1348 
   1349       // Try to optimize or fold the cmp.
   1350       CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   1351       switch (Predicate) {
   1352       default: break;
   1353       case CmpInst::FCMP_FALSE: fastEmitBranch(FalseMBB, DbgLoc); return true;
   1354       case CmpInst::FCMP_TRUE:  fastEmitBranch(TrueMBB, DbgLoc); return true;
   1355       }
   1356 
   1357       const Value *CmpLHS = CI->getOperand(0);
   1358       const Value *CmpRHS = CI->getOperand(1);
   1359 
   1360       // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x,
   1361       // 0.0.
   1362       // We don't have to materialize a zero constant for this case and can just
   1363       // use %x again on the RHS.
   1364       if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
   1365         const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
   1366         if (CmpRHSC && CmpRHSC->isNullValue())
   1367           CmpRHS = CmpLHS;
   1368       }
   1369 
   1370       // Try to take advantage of fallthrough opportunities.
   1371       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
   1372         std::swap(TrueMBB, FalseMBB);
   1373         Predicate = CmpInst::getInversePredicate(Predicate);
   1374       }
   1375 
   1376       // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition
   1377       // code check. Instead two branch instructions are required to check all
   1378       // the flags. First we change the predicate to a supported condition code,
   1379       // which will be the first branch. Later one we will emit the second
   1380       // branch.
   1381       bool NeedExtraBranch = false;
   1382       switch (Predicate) {
   1383       default: break;
   1384       case CmpInst::FCMP_OEQ:
   1385         std::swap(TrueMBB, FalseMBB); // fall-through
   1386       case CmpInst::FCMP_UNE:
   1387         NeedExtraBranch = true;
   1388         Predicate = CmpInst::FCMP_ONE;
   1389         break;
   1390       }
   1391 
   1392       bool SwapArgs;
   1393       unsigned BranchOpc;
   1394       std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate);
   1395       assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
   1396 
   1397       BranchOpc = X86::GetCondBranchFromCond(CC);
   1398       if (SwapArgs)
   1399         std::swap(CmpLHS, CmpRHS);
   1400 
   1401       // Emit a compare of the LHS and RHS, setting the flags.
   1402       if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT, CI->getDebugLoc()))
   1403         return false;
   1404 
   1405       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
   1406         .addMBB(TrueMBB);
   1407 
   1408       // X86 requires a second branch to handle UNE (and OEQ, which is mapped
   1409       // to UNE above).
   1410       if (NeedExtraBranch) {
   1411         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_1))
   1412           .addMBB(TrueMBB);
   1413       }
   1414 
   1415       // Obtain the branch weight and add the TrueBB to the successor list.
   1416       uint32_t BranchWeight = 0;
   1417       if (FuncInfo.BPI)
   1418         BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
   1419                                                    TrueMBB->getBasicBlock());
   1420       FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
   1421 
   1422       // Emits an unconditional branch to the FalseBB, obtains the branch
   1423       // weight, and adds it to the successor list.
   1424       fastEmitBranch(FalseMBB, DbgLoc);
   1425 
   1426       return true;
   1427     }
   1428   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
   1429     // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
   1430     // typically happen for _Bool and C++ bools.
   1431     MVT SourceVT;
   1432     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
   1433         isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
   1434       unsigned TestOpc = 0;
   1435       switch (SourceVT.SimpleTy) {
   1436       default: break;
   1437       case MVT::i8:  TestOpc = X86::TEST8ri; break;
   1438       case MVT::i16: TestOpc = X86::TEST16ri; break;
   1439       case MVT::i32: TestOpc = X86::TEST32ri; break;
   1440       case MVT::i64: TestOpc = X86::TEST64ri32; break;
   1441       }
   1442       if (TestOpc) {
   1443         unsigned OpReg = getRegForValue(TI->getOperand(0));
   1444         if (OpReg == 0) return false;
   1445         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
   1446           .addReg(OpReg).addImm(1);
   1447 
   1448         unsigned JmpOpc = X86::JNE_1;
   1449         if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
   1450           std::swap(TrueMBB, FalseMBB);
   1451           JmpOpc = X86::JE_1;
   1452         }
   1453 
   1454         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc))
   1455           .addMBB(TrueMBB);
   1456         fastEmitBranch(FalseMBB, DbgLoc);
   1457         uint32_t BranchWeight = 0;
   1458         if (FuncInfo.BPI)
   1459           BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
   1460                                                      TrueMBB->getBasicBlock());
   1461         FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
   1462         return true;
   1463       }
   1464     }
   1465   } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) {
   1466     // Fake request the condition, otherwise the intrinsic might be completely
   1467     // optimized away.
   1468     unsigned TmpReg = getRegForValue(BI->getCondition());
   1469     if (TmpReg == 0)
   1470       return false;
   1471 
   1472     unsigned BranchOpc = X86::GetCondBranchFromCond(CC);
   1473 
   1474     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc))
   1475       .addMBB(TrueMBB);
   1476     fastEmitBranch(FalseMBB, DbgLoc);
   1477     uint32_t BranchWeight = 0;
   1478     if (FuncInfo.BPI)
   1479       BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
   1480                                                  TrueMBB->getBasicBlock());
   1481     FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
   1482     return true;
   1483   }
   1484 
   1485   // Otherwise do a clumsy setcc and re-test it.
   1486   // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
   1487   // in an explicit cast, so make sure to handle that correctly.
   1488   unsigned OpReg = getRegForValue(BI->getCondition());
   1489   if (OpReg == 0) return false;
   1490 
   1491   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
   1492     .addReg(OpReg).addImm(1);
   1493   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
   1494     .addMBB(TrueMBB);
   1495   fastEmitBranch(FalseMBB, DbgLoc);
   1496   uint32_t BranchWeight = 0;
   1497   if (FuncInfo.BPI)
   1498     BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(),
   1499                                                TrueMBB->getBasicBlock());
   1500   FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight);
   1501   return true;
   1502 }
   1503 
   1504 bool X86FastISel::X86SelectShift(const Instruction *I) {
   1505   unsigned CReg = 0, OpReg = 0;
   1506   const TargetRegisterClass *RC = nullptr;
   1507   if (I->getType()->isIntegerTy(8)) {
   1508     CReg = X86::CL;
   1509     RC = &X86::GR8RegClass;
   1510     switch (I->getOpcode()) {
   1511     case Instruction::LShr: OpReg = X86::SHR8rCL; break;
   1512     case Instruction::AShr: OpReg = X86::SAR8rCL; break;
   1513     case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
   1514     default: return false;
   1515     }
   1516   } else if (I->getType()->isIntegerTy(16)) {
   1517     CReg = X86::CX;
   1518     RC = &X86::GR16RegClass;
   1519     switch (I->getOpcode()) {
   1520     case Instruction::LShr: OpReg = X86::SHR16rCL; break;
   1521     case Instruction::AShr: OpReg = X86::SAR16rCL; break;
   1522     case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
   1523     default: return false;
   1524     }
   1525   } else if (I->getType()->isIntegerTy(32)) {
   1526     CReg = X86::ECX;
   1527     RC = &X86::GR32RegClass;
   1528     switch (I->getOpcode()) {
   1529     case Instruction::LShr: OpReg = X86::SHR32rCL; break;
   1530     case Instruction::AShr: OpReg = X86::SAR32rCL; break;
   1531     case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
   1532     default: return false;
   1533     }
   1534   } else if (I->getType()->isIntegerTy(64)) {
   1535     CReg = X86::RCX;
   1536     RC = &X86::GR64RegClass;
   1537     switch (I->getOpcode()) {
   1538     case Instruction::LShr: OpReg = X86::SHR64rCL; break;
   1539     case Instruction::AShr: OpReg = X86::SAR64rCL; break;
   1540     case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
   1541     default: return false;
   1542     }
   1543   } else {
   1544     return false;
   1545   }
   1546 
   1547   MVT VT;
   1548   if (!isTypeLegal(I->getType(), VT))
   1549     return false;
   1550 
   1551   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   1552   if (Op0Reg == 0) return false;
   1553 
   1554   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1555   if (Op1Reg == 0) return false;
   1556   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpcode::COPY),
   1557           CReg).addReg(Op1Reg);
   1558 
   1559   // The shift instruction uses X86::CL. If we defined a super-register
   1560   // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
   1561   if (CReg != X86::CL)
   1562     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1563             TII.get(TargetOpcode::KILL), X86::CL)
   1564       .addReg(CReg, RegState::Kill);
   1565 
   1566   unsigned ResultReg = createResultReg(RC);
   1567   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpReg), ResultReg)
   1568     .addReg(Op0Reg);
   1569   updateValueMap(I, ResultReg);
   1570   return true;
   1571 }
   1572 
   1573 bool X86FastISel::X86SelectDivRem(const Instruction *I) {
   1574   const static unsigned NumTypes = 4; // i8, i16, i32, i64
   1575   const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
   1576   const static bool S = true;  // IsSigned
   1577   const static bool U = false; // !IsSigned
   1578   const static unsigned Copy = TargetOpcode::COPY;
   1579   // For the X86 DIV/IDIV instruction, in most cases the dividend
   1580   // (numerator) must be in a specific register pair highreg:lowreg,
   1581   // producing the quotient in lowreg and the remainder in highreg.
   1582   // For most data types, to set up the instruction, the dividend is
   1583   // copied into lowreg, and lowreg is sign-extended or zero-extended
   1584   // into highreg.  The exception is i8, where the dividend is defined
   1585   // as a single register rather than a register pair, and we
   1586   // therefore directly sign-extend or zero-extend the dividend into
   1587   // lowreg, instead of copying, and ignore the highreg.
   1588   const static struct DivRemEntry {
   1589     // The following portion depends only on the data type.
   1590     const TargetRegisterClass *RC;
   1591     unsigned LowInReg;  // low part of the register pair
   1592     unsigned HighInReg; // high part of the register pair
   1593     // The following portion depends on both the data type and the operation.
   1594     struct DivRemResult {
   1595     unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
   1596     unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
   1597                               // highreg, or copying a zero into highreg.
   1598     unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
   1599                               // zero/sign-extending into lowreg for i8.
   1600     unsigned DivRemResultReg; // Register containing the desired result.
   1601     bool IsOpSigned;          // Whether to use signed or unsigned form.
   1602     } ResultTable[NumOps];
   1603   } OpTable[NumTypes] = {
   1604     { &X86::GR8RegClass,  X86::AX,  0, {
   1605         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
   1606         { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
   1607         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
   1608         { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
   1609       }
   1610     }, // i8
   1611     { &X86::GR16RegClass, X86::AX,  X86::DX, {
   1612         { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
   1613         { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
   1614         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::AX,  U }, // UDiv
   1615         { X86::DIV16r,  X86::MOV32r0, Copy,            X86::DX,  U }, // URem
   1616       }
   1617     }, // i16
   1618     { &X86::GR32RegClass, X86::EAX, X86::EDX, {
   1619         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
   1620         { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
   1621         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
   1622         { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
   1623       }
   1624     }, // i32
   1625     { &X86::GR64RegClass, X86::RAX, X86::RDX, {
   1626         { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
   1627         { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
   1628         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RAX, U }, // UDiv
   1629         { X86::DIV64r,  X86::MOV32r0, Copy,            X86::RDX, U }, // URem
   1630       }
   1631     }, // i64
   1632   };
   1633 
   1634   MVT VT;
   1635   if (!isTypeLegal(I->getType(), VT))
   1636     return false;
   1637 
   1638   unsigned TypeIndex, OpIndex;
   1639   switch (VT.SimpleTy) {
   1640   default: return false;
   1641   case MVT::i8:  TypeIndex = 0; break;
   1642   case MVT::i16: TypeIndex = 1; break;
   1643   case MVT::i32: TypeIndex = 2; break;
   1644   case MVT::i64: TypeIndex = 3;
   1645     if (!Subtarget->is64Bit())
   1646       return false;
   1647     break;
   1648   }
   1649 
   1650   switch (I->getOpcode()) {
   1651   default: llvm_unreachable("Unexpected div/rem opcode");
   1652   case Instruction::SDiv: OpIndex = 0; break;
   1653   case Instruction::SRem: OpIndex = 1; break;
   1654   case Instruction::UDiv: OpIndex = 2; break;
   1655   case Instruction::URem: OpIndex = 3; break;
   1656   }
   1657 
   1658   const DivRemEntry &TypeEntry = OpTable[TypeIndex];
   1659   const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
   1660   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   1661   if (Op0Reg == 0)
   1662     return false;
   1663   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1664   if (Op1Reg == 0)
   1665     return false;
   1666 
   1667   // Move op0 into low-order input register.
   1668   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1669           TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
   1670   // Zero-extend or sign-extend into high-order input register.
   1671   if (OpEntry.OpSignExtend) {
   1672     if (OpEntry.IsOpSigned)
   1673       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1674               TII.get(OpEntry.OpSignExtend));
   1675     else {
   1676       unsigned Zero32 = createResultReg(&X86::GR32RegClass);
   1677       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1678               TII.get(X86::MOV32r0), Zero32);
   1679 
   1680       // Copy the zero into the appropriate sub/super/identical physical
   1681       // register. Unfortunately the operations needed are not uniform enough
   1682       // to fit neatly into the table above.
   1683       if (VT.SimpleTy == MVT::i16) {
   1684         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1685                 TII.get(Copy), TypeEntry.HighInReg)
   1686           .addReg(Zero32, 0, X86::sub_16bit);
   1687       } else if (VT.SimpleTy == MVT::i32) {
   1688         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1689                 TII.get(Copy), TypeEntry.HighInReg)
   1690             .addReg(Zero32);
   1691       } else if (VT.SimpleTy == MVT::i64) {
   1692         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1693                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
   1694             .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
   1695       }
   1696     }
   1697   }
   1698   // Generate the DIV/IDIV instruction.
   1699   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1700           TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
   1701   // For i8 remainder, we can't reference AH directly, as we'll end
   1702   // up with bogus copies like %R9B = COPY %AH. Reference AX
   1703   // instead to prevent AH references in a REX instruction.
   1704   //
   1705   // The current assumption of the fast register allocator is that isel
   1706   // won't generate explicit references to the GPR8_NOREX registers. If
   1707   // the allocator and/or the backend get enhanced to be more robust in
   1708   // that regard, this can be, and should be, removed.
   1709   unsigned ResultReg = 0;
   1710   if ((I->getOpcode() == Instruction::SRem ||
   1711        I->getOpcode() == Instruction::URem) &&
   1712       OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) {
   1713     unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass);
   1714     unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass);
   1715     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   1716             TII.get(Copy), SourceSuperReg).addReg(X86::AX);
   1717 
   1718     // Shift AX right by 8 bits instead of using AH.
   1719     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SHR16ri),
   1720             ResultSuperReg).addReg(SourceSuperReg).addImm(8);
   1721 
   1722     // Now reference the 8-bit subreg of the result.
   1723     ResultReg = fastEmitInst_extractsubreg(MVT::i8, ResultSuperReg,
   1724                                            /*Kill=*/true, X86::sub_8bit);
   1725   }
   1726   // Copy the result out of the physreg if we haven't already.
   1727   if (!ResultReg) {
   1728     ResultReg = createResultReg(TypeEntry.RC);
   1729     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Copy), ResultReg)
   1730         .addReg(OpEntry.DivRemResultReg);
   1731   }
   1732   updateValueMap(I, ResultReg);
   1733 
   1734   return true;
   1735 }
   1736 
   1737 /// \brief Emit a conditional move instruction (if the are supported) to lower
   1738 /// the select.
   1739 bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
   1740   // Check if the subtarget supports these instructions.
   1741   if (!Subtarget->hasCMov())
   1742     return false;
   1743 
   1744   // FIXME: Add support for i8.
   1745   if (RetVT < MVT::i16 || RetVT > MVT::i64)
   1746     return false;
   1747 
   1748   const Value *Cond = I->getOperand(0);
   1749   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
   1750   bool NeedTest = true;
   1751   X86::CondCode CC = X86::COND_NE;
   1752 
   1753   // Optimize conditions coming from a compare if both instructions are in the
   1754   // same basic block (values defined in other basic blocks may not have
   1755   // initialized registers).
   1756   const auto *CI = dyn_cast<CmpInst>(Cond);
   1757   if (CI && (CI->getParent() == I->getParent())) {
   1758     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   1759 
   1760     // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
   1761     static unsigned SETFOpcTable[2][3] = {
   1762       { X86::SETNPr, X86::SETEr , X86::TEST8rr },
   1763       { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
   1764     };
   1765     unsigned *SETFOpc = nullptr;
   1766     switch (Predicate) {
   1767     default: break;
   1768     case CmpInst::FCMP_OEQ:
   1769       SETFOpc = &SETFOpcTable[0][0];
   1770       Predicate = CmpInst::ICMP_NE;
   1771       break;
   1772     case CmpInst::FCMP_UNE:
   1773       SETFOpc = &SETFOpcTable[1][0];
   1774       Predicate = CmpInst::ICMP_NE;
   1775       break;
   1776     }
   1777 
   1778     bool NeedSwap;
   1779     std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate);
   1780     assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
   1781 
   1782     const Value *CmpLHS = CI->getOperand(0);
   1783     const Value *CmpRHS = CI->getOperand(1);
   1784     if (NeedSwap)
   1785       std::swap(CmpLHS, CmpRHS);
   1786 
   1787     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
   1788     // Emit a compare of the LHS and RHS, setting the flags.
   1789     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
   1790       return false;
   1791 
   1792     if (SETFOpc) {
   1793       unsigned FlagReg1 = createResultReg(&X86::GR8RegClass);
   1794       unsigned FlagReg2 = createResultReg(&X86::GR8RegClass);
   1795       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]),
   1796               FlagReg1);
   1797       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]),
   1798               FlagReg2);
   1799       auto const &II = TII.get(SETFOpc[2]);
   1800       if (II.getNumDefs()) {
   1801         unsigned TmpReg = createResultReg(&X86::GR8RegClass);
   1802         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg)
   1803           .addReg(FlagReg2).addReg(FlagReg1);
   1804       } else {
   1805         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
   1806           .addReg(FlagReg2).addReg(FlagReg1);
   1807       }
   1808     }
   1809     NeedTest = false;
   1810   } else if (foldX86XALUIntrinsic(CC, I, Cond)) {
   1811     // Fake request the condition, otherwise the intrinsic might be completely
   1812     // optimized away.
   1813     unsigned TmpReg = getRegForValue(Cond);
   1814     if (TmpReg == 0)
   1815       return false;
   1816 
   1817     NeedTest = false;
   1818   }
   1819 
   1820   if (NeedTest) {
   1821     // Selects operate on i1, however, CondReg is 8 bits width and may contain
   1822     // garbage. Indeed, only the less significant bit is supposed to be
   1823     // accurate. If we read more than the lsb, we may see non-zero values
   1824     // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for
   1825     // the select. This is achieved by performing TEST against 1.
   1826     unsigned CondReg = getRegForValue(Cond);
   1827     if (CondReg == 0)
   1828       return false;
   1829     bool CondIsKill = hasTrivialKill(Cond);
   1830 
   1831     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
   1832       .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
   1833   }
   1834 
   1835   const Value *LHS = I->getOperand(1);
   1836   const Value *RHS = I->getOperand(2);
   1837 
   1838   unsigned RHSReg = getRegForValue(RHS);
   1839   bool RHSIsKill = hasTrivialKill(RHS);
   1840 
   1841   unsigned LHSReg = getRegForValue(LHS);
   1842   bool LHSIsKill = hasTrivialKill(LHS);
   1843 
   1844   if (!LHSReg || !RHSReg)
   1845     return false;
   1846 
   1847   unsigned Opc = X86::getCMovFromCond(CC, RC->getSize());
   1848   unsigned ResultReg = fastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill,
   1849                                        LHSReg, LHSIsKill);
   1850   updateValueMap(I, ResultReg);
   1851   return true;
   1852 }
   1853 
   1854 /// \brief Emit SSE or AVX instructions to lower the select.
   1855 ///
   1856 /// Try to use SSE1/SSE2 instructions to simulate a select without branches.
   1857 /// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary
   1858 /// SSE instructions are available. If AVX is available, try to use a VBLENDV.
   1859 bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
   1860   // Optimize conditions coming from a compare if both instructions are in the
   1861   // same basic block (values defined in other basic blocks may not have
   1862   // initialized registers).
   1863   const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0));
   1864   if (!CI || (CI->getParent() != I->getParent()))
   1865     return false;
   1866 
   1867   if (I->getType() != CI->getOperand(0)->getType() ||
   1868       !((Subtarget->hasSSE1() && RetVT == MVT::f32) ||
   1869         (Subtarget->hasSSE2() && RetVT == MVT::f64)))
   1870     return false;
   1871 
   1872   const Value *CmpLHS = CI->getOperand(0);
   1873   const Value *CmpRHS = CI->getOperand(1);
   1874   CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   1875 
   1876   // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0.
   1877   // We don't have to materialize a zero constant for this case and can just use
   1878   // %x again on the RHS.
   1879   if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) {
   1880     const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS);
   1881     if (CmpRHSC && CmpRHSC->isNullValue())
   1882       CmpRHS = CmpLHS;
   1883   }
   1884 
   1885   unsigned CC;
   1886   bool NeedSwap;
   1887   std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate);
   1888   if (CC > 7)
   1889     return false;
   1890 
   1891   if (NeedSwap)
   1892     std::swap(CmpLHS, CmpRHS);
   1893 
   1894   // Choose the SSE instruction sequence based on data type (float or double).
   1895   static unsigned OpcTable[2][4] = {
   1896     { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
   1897     { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  }
   1898   };
   1899 
   1900   unsigned *Opc = nullptr;
   1901   switch (RetVT.SimpleTy) {
   1902   default: return false;
   1903   case MVT::f32: Opc = &OpcTable[0][0]; break;
   1904   case MVT::f64: Opc = &OpcTable[1][0]; break;
   1905   }
   1906 
   1907   const Value *LHS = I->getOperand(1);
   1908   const Value *RHS = I->getOperand(2);
   1909 
   1910   unsigned LHSReg = getRegForValue(LHS);
   1911   bool LHSIsKill = hasTrivialKill(LHS);
   1912 
   1913   unsigned RHSReg = getRegForValue(RHS);
   1914   bool RHSIsKill = hasTrivialKill(RHS);
   1915 
   1916   unsigned CmpLHSReg = getRegForValue(CmpLHS);
   1917   bool CmpLHSIsKill = hasTrivialKill(CmpLHS);
   1918 
   1919   unsigned CmpRHSReg = getRegForValue(CmpRHS);
   1920   bool CmpRHSIsKill = hasTrivialKill(CmpRHS);
   1921 
   1922   if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS)
   1923     return false;
   1924 
   1925   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
   1926   unsigned ResultReg;
   1927 
   1928   if (Subtarget->hasAVX()) {
   1929     // If we have AVX, create 1 blendv instead of 3 logic instructions.
   1930     // Blendv was introduced with SSE 4.1, but the 2 register form implicitly
   1931     // uses XMM0 as the selection register. That may need just as many
   1932     // instructions as the AND/ANDN/OR sequence due to register moves, so
   1933     // don't bother.
   1934     unsigned CmpOpcode =
   1935       (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
   1936     unsigned BlendOpcode =
   1937       (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
   1938 
   1939     unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
   1940                                        CmpRHSReg, CmpRHSIsKill, CC);
   1941     ResultReg = fastEmitInst_rrr(BlendOpcode, RC, RHSReg, RHSIsKill,
   1942                                  LHSReg, LHSIsKill, CmpReg, true);
   1943   } else {
   1944     unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
   1945                                        CmpRHSReg, CmpRHSIsKill, CC);
   1946     unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
   1947                                       LHSReg, LHSIsKill);
   1948     unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
   1949                                        RHSReg, RHSIsKill);
   1950     ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
   1951                                          AndReg, /*IsKill=*/true);
   1952   }
   1953   updateValueMap(I, ResultReg);
   1954   return true;
   1955 }
   1956 
   1957 bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
   1958   // These are pseudo CMOV instructions and will be later expanded into control-
   1959   // flow.
   1960   unsigned Opc;
   1961   switch (RetVT.SimpleTy) {
   1962   default: return false;
   1963   case MVT::i8:  Opc = X86::CMOV_GR8;  break;
   1964   case MVT::i16: Opc = X86::CMOV_GR16; break;
   1965   case MVT::i32: Opc = X86::CMOV_GR32; break;
   1966   case MVT::f32: Opc = X86::CMOV_FR32; break;
   1967   case MVT::f64: Opc = X86::CMOV_FR64; break;
   1968   }
   1969 
   1970   const Value *Cond = I->getOperand(0);
   1971   X86::CondCode CC = X86::COND_NE;
   1972 
   1973   // Optimize conditions coming from a compare if both instructions are in the
   1974   // same basic block (values defined in other basic blocks may not have
   1975   // initialized registers).
   1976   const auto *CI = dyn_cast<CmpInst>(Cond);
   1977   if (CI && (CI->getParent() == I->getParent())) {
   1978     bool NeedSwap;
   1979     std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate());
   1980     if (CC > X86::LAST_VALID_COND)
   1981       return false;
   1982 
   1983     const Value *CmpLHS = CI->getOperand(0);
   1984     const Value *CmpRHS = CI->getOperand(1);
   1985 
   1986     if (NeedSwap)
   1987       std::swap(CmpLHS, CmpRHS);
   1988 
   1989     EVT CmpVT = TLI.getValueType(CmpLHS->getType());
   1990     if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT, CI->getDebugLoc()))
   1991       return false;
   1992   } else {
   1993     unsigned CondReg = getRegForValue(Cond);
   1994     if (CondReg == 0)
   1995       return false;
   1996     bool CondIsKill = hasTrivialKill(Cond);
   1997     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
   1998       .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
   1999   }
   2000 
   2001   const Value *LHS = I->getOperand(1);
   2002   const Value *RHS = I->getOperand(2);
   2003 
   2004   unsigned LHSReg = getRegForValue(LHS);
   2005   bool LHSIsKill = hasTrivialKill(LHS);
   2006 
   2007   unsigned RHSReg = getRegForValue(RHS);
   2008   bool RHSIsKill = hasTrivialKill(RHS);
   2009 
   2010   if (!LHSReg || !RHSReg)
   2011     return false;
   2012 
   2013   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
   2014 
   2015   unsigned ResultReg =
   2016     fastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC);
   2017   updateValueMap(I, ResultReg);
   2018   return true;
   2019 }
   2020 
   2021 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   2022   MVT RetVT;
   2023   if (!isTypeLegal(I->getType(), RetVT))
   2024     return false;
   2025 
   2026   // Check if we can fold the select.
   2027   if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) {
   2028     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
   2029     const Value *Opnd = nullptr;
   2030     switch (Predicate) {
   2031     default:                              break;
   2032     case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break;
   2033     case CmpInst::FCMP_TRUE:  Opnd = I->getOperand(1); break;
   2034     }
   2035     // No need for a select anymore - this is an unconditional move.
   2036     if (Opnd) {
   2037       unsigned OpReg = getRegForValue(Opnd);
   2038       if (OpReg == 0)
   2039         return false;
   2040       bool OpIsKill = hasTrivialKill(Opnd);
   2041       const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
   2042       unsigned ResultReg = createResultReg(RC);
   2043       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2044               TII.get(TargetOpcode::COPY), ResultReg)
   2045         .addReg(OpReg, getKillRegState(OpIsKill));
   2046       updateValueMap(I, ResultReg);
   2047       return true;
   2048     }
   2049   }
   2050 
   2051   // First try to use real conditional move instructions.
   2052   if (X86FastEmitCMoveSelect(RetVT, I))
   2053     return true;
   2054 
   2055   // Try to use a sequence of SSE instructions to simulate a conditional move.
   2056   if (X86FastEmitSSESelect(RetVT, I))
   2057     return true;
   2058 
   2059   // Fall-back to pseudo conditional move instructions, which will be later
   2060   // converted to control-flow.
   2061   if (X86FastEmitPseudoSelect(RetVT, I))
   2062     return true;
   2063 
   2064   return false;
   2065 }
   2066 
   2067 bool X86FastISel::X86SelectSIToFP(const Instruction *I) {
   2068   if (!I->getOperand(0)->getType()->isIntegerTy(32))
   2069     return false;
   2070 
   2071   // Select integer to float/double conversion.
   2072   unsigned OpReg = getRegForValue(I->getOperand(0));
   2073   if (OpReg == 0)
   2074     return false;
   2075 
   2076   const TargetRegisterClass *RC = nullptr;
   2077   unsigned Opcode;
   2078 
   2079   if (I->getType()->isDoubleTy()) {
   2080     // sitofp int -> double
   2081     Opcode = X86::VCVTSI2SDrr;
   2082     RC = &X86::FR64RegClass;
   2083   } else if (I->getType()->isFloatTy()) {
   2084     // sitofp int -> float
   2085     Opcode = X86::VCVTSI2SSrr;
   2086     RC = &X86::FR32RegClass;
   2087   } else
   2088     return false;
   2089 
   2090   // The target-independent selection algorithm in FastISel already knows how
   2091   // to select a SINT_TO_FP if the target is SSE but not AVX. This code is only
   2092   // reachable if the subtarget has AVX.
   2093   assert(Subtarget->hasAVX() && "Expected a subtarget with AVX!");
   2094 
   2095   unsigned ImplicitDefReg = createResultReg(RC);
   2096   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2097           TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
   2098   unsigned ResultReg =
   2099       fastEmitInst_rr(Opcode, RC, ImplicitDefReg, true, OpReg, false);
   2100   updateValueMap(I, ResultReg);
   2101   return true;
   2102 }
   2103 
   2104 // Helper method used by X86SelectFPExt and X86SelectFPTrunc.
   2105 bool X86FastISel::X86SelectFPExtOrFPTrunc(const Instruction *I,
   2106                                           unsigned TargetOpc,
   2107                                           const TargetRegisterClass *RC) {
   2108   assert((I->getOpcode() == Instruction::FPExt ||
   2109           I->getOpcode() == Instruction::FPTrunc) &&
   2110          "Instruction must be an FPExt or FPTrunc!");
   2111 
   2112   unsigned OpReg = getRegForValue(I->getOperand(0));
   2113   if (OpReg == 0)
   2114     return false;
   2115 
   2116   unsigned ResultReg = createResultReg(RC);
   2117   MachineInstrBuilder MIB;
   2118   MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TargetOpc),
   2119                 ResultReg);
   2120   if (Subtarget->hasAVX())
   2121     MIB.addReg(OpReg);
   2122   MIB.addReg(OpReg);
   2123   updateValueMap(I, ResultReg);
   2124   return true;
   2125 }
   2126 
   2127 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
   2128   if (X86ScalarSSEf64 && I->getType()->isDoubleTy() &&
   2129       I->getOperand(0)->getType()->isFloatTy()) {
   2130     // fpext from float to double.
   2131     unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSS2SDrr : X86::CVTSS2SDrr;
   2132     return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR64RegClass);
   2133   }
   2134 
   2135   return false;
   2136 }
   2137 
   2138 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
   2139   if (X86ScalarSSEf64 && I->getType()->isFloatTy() &&
   2140       I->getOperand(0)->getType()->isDoubleTy()) {
   2141     // fptrunc from double to float.
   2142     unsigned Opc = Subtarget->hasAVX() ? X86::VCVTSD2SSrr : X86::CVTSD2SSrr;
   2143     return X86SelectFPExtOrFPTrunc(I, Opc, &X86::FR32RegClass);
   2144   }
   2145 
   2146   return false;
   2147 }
   2148 
   2149 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
   2150   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   2151   EVT DstVT = TLI.getValueType(I->getType());
   2152 
   2153   // This code only handles truncation to byte.
   2154   if (DstVT != MVT::i8 && DstVT != MVT::i1)
   2155     return false;
   2156   if (!TLI.isTypeLegal(SrcVT))
   2157     return false;
   2158 
   2159   unsigned InputReg = getRegForValue(I->getOperand(0));
   2160   if (!InputReg)
   2161     // Unhandled operand.  Halt "fast" selection and bail.
   2162     return false;
   2163 
   2164   if (SrcVT == MVT::i8) {
   2165     // Truncate from i8 to i1; no code needed.
   2166     updateValueMap(I, InputReg);
   2167     return true;
   2168   }
   2169 
   2170   if (!Subtarget->is64Bit()) {
   2171     // If we're on x86-32; we can't extract an i8 from a general register.
   2172     // First issue a copy to GR16_ABCD or GR32_ABCD.
   2173     const TargetRegisterClass *CopyRC =
   2174       (SrcVT == MVT::i16) ? &X86::GR16_ABCDRegClass : &X86::GR32_ABCDRegClass;
   2175     unsigned CopyReg = createResultReg(CopyRC);
   2176     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2177             TII.get(TargetOpcode::COPY), CopyReg).addReg(InputReg);
   2178     InputReg = CopyReg;
   2179   }
   2180 
   2181   // Issue an extract_subreg.
   2182   unsigned ResultReg = fastEmitInst_extractsubreg(MVT::i8,
   2183                                                   InputReg, /*Kill=*/true,
   2184                                                   X86::sub_8bit);
   2185   if (!ResultReg)
   2186     return false;
   2187 
   2188   updateValueMap(I, ResultReg);
   2189   return true;
   2190 }
   2191 
   2192 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
   2193   return Len <= (Subtarget->is64Bit() ? 32 : 16);
   2194 }
   2195 
   2196 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
   2197                                      X86AddressMode SrcAM, uint64_t Len) {
   2198 
   2199   // Make sure we don't bloat code by inlining very large memcpy's.
   2200   if (!IsMemcpySmall(Len))
   2201     return false;
   2202 
   2203   bool i64Legal = Subtarget->is64Bit();
   2204 
   2205   // We don't care about alignment here since we just emit integer accesses.
   2206   while (Len) {
   2207     MVT VT;
   2208     if (Len >= 8 && i64Legal)
   2209       VT = MVT::i64;
   2210     else if (Len >= 4)
   2211       VT = MVT::i32;
   2212     else if (Len >= 2)
   2213       VT = MVT::i16;
   2214     else
   2215       VT = MVT::i8;
   2216 
   2217     unsigned Reg;
   2218     bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg);
   2219     RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM);
   2220     assert(RV && "Failed to emit load or store??");
   2221 
   2222     unsigned Size = VT.getSizeInBits()/8;
   2223     Len -= Size;
   2224     DestAM.Disp += Size;
   2225     SrcAM.Disp += Size;
   2226   }
   2227 
   2228   return true;
   2229 }
   2230 
   2231 bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
   2232   // FIXME: Handle more intrinsics.
   2233   switch (II->getIntrinsicID()) {
   2234   default: return false;
   2235   case Intrinsic::convert_from_fp16:
   2236   case Intrinsic::convert_to_fp16: {
   2237     if (TM.Options.UseSoftFloat || !Subtarget->hasF16C())
   2238       return false;
   2239 
   2240     const Value *Op = II->getArgOperand(0);
   2241     unsigned InputReg = getRegForValue(Op);
   2242     if (InputReg == 0)
   2243       return false;
   2244 
   2245     // F16C only allows converting from float to half and from half to float.
   2246     bool IsFloatToHalf = II->getIntrinsicID() == Intrinsic::convert_to_fp16;
   2247     if (IsFloatToHalf) {
   2248       if (!Op->getType()->isFloatTy())
   2249         return false;
   2250     } else {
   2251       if (!II->getType()->isFloatTy())
   2252         return false;
   2253     }
   2254 
   2255     unsigned ResultReg = 0;
   2256     const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::v8i16);
   2257     if (IsFloatToHalf) {
   2258       // 'InputReg' is implicitly promoted from register class FR32 to
   2259       // register class VR128 by method 'constrainOperandRegClass' which is
   2260       // directly called by 'fastEmitInst_ri'.
   2261       // Instruction VCVTPS2PHrr takes an extra immediate operand which is
   2262       // used to provide rounding control.
   2263       InputReg = fastEmitInst_ri(X86::VCVTPS2PHrr, RC, InputReg, false, 0);
   2264 
   2265       // Move the lower 32-bits of ResultReg to another register of class GR32.
   2266       ResultReg = createResultReg(&X86::GR32RegClass);
   2267       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2268               TII.get(X86::VMOVPDI2DIrr), ResultReg)
   2269           .addReg(InputReg, RegState::Kill);
   2270 
   2271       // The result value is in the lower 16-bits of ResultReg.
   2272       unsigned RegIdx = X86::sub_16bit;
   2273       ResultReg = fastEmitInst_extractsubreg(MVT::i16, ResultReg, true, RegIdx);
   2274     } else {
   2275       assert(Op->getType()->isIntegerTy(16) && "Expected a 16-bit integer!");
   2276       // Explicitly sign-extend the input to 32-bit.
   2277       InputReg = fastEmit_r(MVT::i16, MVT::i32, ISD::SIGN_EXTEND, InputReg,
   2278                             /*Kill=*/false);
   2279 
   2280       // The following SCALAR_TO_VECTOR will be expanded into a VMOVDI2PDIrr.
   2281       InputReg = fastEmit_r(MVT::i32, MVT::v4i32, ISD::SCALAR_TO_VECTOR,
   2282                             InputReg, /*Kill=*/true);
   2283 
   2284       InputReg = fastEmitInst_r(X86::VCVTPH2PSrr, RC, InputReg, /*Kill=*/true);
   2285 
   2286       // The result value is in the lower 32-bits of ResultReg.
   2287       // Emit an explicit copy from register class VR128 to register class FR32.
   2288       ResultReg = createResultReg(&X86::FR32RegClass);
   2289       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2290               TII.get(TargetOpcode::COPY), ResultReg)
   2291           .addReg(InputReg, RegState::Kill);
   2292     }
   2293 
   2294     updateValueMap(II, ResultReg);
   2295     return true;
   2296   }
   2297   case Intrinsic::frameaddress: {
   2298     MachineFunction *MF = FuncInfo.MF;
   2299     if (MF->getTarget().getMCAsmInfo()->usesWindowsCFI())
   2300       return false;
   2301 
   2302     Type *RetTy = II->getCalledFunction()->getReturnType();
   2303 
   2304     MVT VT;
   2305     if (!isTypeLegal(RetTy, VT))
   2306       return false;
   2307 
   2308     unsigned Opc;
   2309     const TargetRegisterClass *RC = nullptr;
   2310 
   2311     switch (VT.SimpleTy) {
   2312     default: llvm_unreachable("Invalid result type for frameaddress.");
   2313     case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break;
   2314     case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break;
   2315     }
   2316 
   2317     // This needs to be set before we call getPtrSizedFrameRegister, otherwise
   2318     // we get the wrong frame register.
   2319     MachineFrameInfo *MFI = MF->getFrameInfo();
   2320     MFI->setFrameAddressIsTaken(true);
   2321 
   2322     const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   2323     unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
   2324     assert(((FrameReg == X86::RBP && VT == MVT::i64) ||
   2325             (FrameReg == X86::EBP && VT == MVT::i32)) &&
   2326            "Invalid Frame Register!");
   2327 
   2328     // Always make a copy of the frame register to to a vreg first, so that we
   2329     // never directly reference the frame register (the TwoAddressInstruction-
   2330     // Pass doesn't like that).
   2331     unsigned SrcReg = createResultReg(RC);
   2332     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2333             TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg);
   2334 
   2335     // Now recursively load from the frame address.
   2336     // movq (%rbp), %rax
   2337     // movq (%rax), %rax
   2338     // movq (%rax), %rax
   2339     // ...
   2340     unsigned DestReg;
   2341     unsigned Depth = cast<ConstantInt>(II->getOperand(0))->getZExtValue();
   2342     while (Depth--) {
   2343       DestReg = createResultReg(RC);
   2344       addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2345                            TII.get(Opc), DestReg), SrcReg);
   2346       SrcReg = DestReg;
   2347     }
   2348 
   2349     updateValueMap(II, SrcReg);
   2350     return true;
   2351   }
   2352   case Intrinsic::memcpy: {
   2353     const MemCpyInst *MCI = cast<MemCpyInst>(II);
   2354     // Don't handle volatile or variable length memcpys.
   2355     if (MCI->isVolatile())
   2356       return false;
   2357 
   2358     if (isa<ConstantInt>(MCI->getLength())) {
   2359       // Small memcpy's are common enough that we want to do them
   2360       // without a call if possible.
   2361       uint64_t Len = cast<ConstantInt>(MCI->getLength())->getZExtValue();
   2362       if (IsMemcpySmall(Len)) {
   2363         X86AddressMode DestAM, SrcAM;
   2364         if (!X86SelectAddress(MCI->getRawDest(), DestAM) ||
   2365             !X86SelectAddress(MCI->getRawSource(), SrcAM))
   2366           return false;
   2367         TryEmitSmallMemcpy(DestAM, SrcAM, Len);
   2368         return true;
   2369       }
   2370     }
   2371 
   2372     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
   2373     if (!MCI->getLength()->getType()->isIntegerTy(SizeWidth))
   2374       return false;
   2375 
   2376     if (MCI->getSourceAddressSpace() > 255 || MCI->getDestAddressSpace() > 255)
   2377       return false;
   2378 
   2379     return lowerCallTo(II, "memcpy", II->getNumArgOperands() - 2);
   2380   }
   2381   case Intrinsic::memset: {
   2382     const MemSetInst *MSI = cast<MemSetInst>(II);
   2383 
   2384     if (MSI->isVolatile())
   2385       return false;
   2386 
   2387     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
   2388     if (!MSI->getLength()->getType()->isIntegerTy(SizeWidth))
   2389       return false;
   2390 
   2391     if (MSI->getDestAddressSpace() > 255)
   2392       return false;
   2393 
   2394     return lowerCallTo(II, "memset", II->getNumArgOperands() - 2);
   2395   }
   2396   case Intrinsic::stackprotector: {
   2397     // Emit code to store the stack guard onto the stack.
   2398     EVT PtrTy = TLI.getPointerTy();
   2399 
   2400     const Value *Op1 = II->getArgOperand(0); // The guard's value.
   2401     const AllocaInst *Slot = cast<AllocaInst>(II->getArgOperand(1));
   2402 
   2403     MFI.setStackProtectorIndex(FuncInfo.StaticAllocaMap[Slot]);
   2404 
   2405     // Grab the frame index.
   2406     X86AddressMode AM;
   2407     if (!X86SelectAddress(Slot, AM)) return false;
   2408     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
   2409     return true;
   2410   }
   2411   case Intrinsic::dbg_declare: {
   2412     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
   2413     X86AddressMode AM;
   2414     assert(DI->getAddress() && "Null address should be checked earlier!");
   2415     if (!X86SelectAddress(DI->getAddress(), AM))
   2416       return false;
   2417     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
   2418     // FIXME may need to add RegState::Debug to any registers produced,
   2419     // although ESP/EBP should be the only ones at the moment.
   2420     assert(DI->getVariable()->isValidLocationForIntrinsic(DbgLoc) &&
   2421            "Expected inlined-at fields to agree");
   2422     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II), AM)
   2423         .addImm(0)
   2424         .addMetadata(DI->getVariable())
   2425         .addMetadata(DI->getExpression());
   2426     return true;
   2427   }
   2428   case Intrinsic::trap: {
   2429     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP));
   2430     return true;
   2431   }
   2432   case Intrinsic::sqrt: {
   2433     if (!Subtarget->hasSSE1())
   2434       return false;
   2435 
   2436     Type *RetTy = II->getCalledFunction()->getReturnType();
   2437 
   2438     MVT VT;
   2439     if (!isTypeLegal(RetTy, VT))
   2440       return false;
   2441 
   2442     // Unfortunately we can't use fastEmit_r, because the AVX version of FSQRT
   2443     // is not generated by FastISel yet.
   2444     // FIXME: Update this code once tablegen can handle it.
   2445     static const unsigned SqrtOpc[2][2] = {
   2446       {X86::SQRTSSr, X86::VSQRTSSr},
   2447       {X86::SQRTSDr, X86::VSQRTSDr}
   2448     };
   2449     bool HasAVX = Subtarget->hasAVX();
   2450     unsigned Opc;
   2451     const TargetRegisterClass *RC;
   2452     switch (VT.SimpleTy) {
   2453     default: return false;
   2454     case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break;
   2455     case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break;
   2456     }
   2457 
   2458     const Value *SrcVal = II->getArgOperand(0);
   2459     unsigned SrcReg = getRegForValue(SrcVal);
   2460 
   2461     if (SrcReg == 0)
   2462       return false;
   2463 
   2464     unsigned ImplicitDefReg = 0;
   2465     if (HasAVX) {
   2466       ImplicitDefReg = createResultReg(RC);
   2467       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2468               TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
   2469     }
   2470 
   2471     unsigned ResultReg = createResultReg(RC);
   2472     MachineInstrBuilder MIB;
   2473     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
   2474                   ResultReg);
   2475 
   2476     if (ImplicitDefReg)
   2477       MIB.addReg(ImplicitDefReg);
   2478 
   2479     MIB.addReg(SrcReg);
   2480 
   2481     updateValueMap(II, ResultReg);
   2482     return true;
   2483   }
   2484   case Intrinsic::sadd_with_overflow:
   2485   case Intrinsic::uadd_with_overflow:
   2486   case Intrinsic::ssub_with_overflow:
   2487   case Intrinsic::usub_with_overflow:
   2488   case Intrinsic::smul_with_overflow:
   2489   case Intrinsic::umul_with_overflow: {
   2490     // This implements the basic lowering of the xalu with overflow intrinsics
   2491     // into add/sub/mul followed by either seto or setb.
   2492     const Function *Callee = II->getCalledFunction();
   2493     auto *Ty = cast<StructType>(Callee->getReturnType());
   2494     Type *RetTy = Ty->getTypeAtIndex(0U);
   2495     Type *CondTy = Ty->getTypeAtIndex(1);
   2496 
   2497     MVT VT;
   2498     if (!isTypeLegal(RetTy, VT))
   2499       return false;
   2500 
   2501     if (VT < MVT::i8 || VT > MVT::i64)
   2502       return false;
   2503 
   2504     const Value *LHS = II->getArgOperand(0);
   2505     const Value *RHS = II->getArgOperand(1);
   2506 
   2507     // Canonicalize immediate to the RHS.
   2508     if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) &&
   2509         isCommutativeIntrinsic(II))
   2510       std::swap(LHS, RHS);
   2511 
   2512     bool UseIncDec = false;
   2513     if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
   2514       UseIncDec = true;
   2515 
   2516     unsigned BaseOpc, CondOpc;
   2517     switch (II->getIntrinsicID()) {
   2518     default: llvm_unreachable("Unexpected intrinsic!");
   2519     case Intrinsic::sadd_with_overflow:
   2520       BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
   2521       CondOpc = X86::SETOr;
   2522       break;
   2523     case Intrinsic::uadd_with_overflow:
   2524       BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
   2525     case Intrinsic::ssub_with_overflow:
   2526       BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
   2527       CondOpc = X86::SETOr;
   2528       break;
   2529     case Intrinsic::usub_with_overflow:
   2530       BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
   2531     case Intrinsic::smul_with_overflow:
   2532       BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break;
   2533     case Intrinsic::umul_with_overflow:
   2534       BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break;
   2535     }
   2536 
   2537     unsigned LHSReg = getRegForValue(LHS);
   2538     if (LHSReg == 0)
   2539       return false;
   2540     bool LHSIsKill = hasTrivialKill(LHS);
   2541 
   2542     unsigned ResultReg = 0;
   2543     // Check if we have an immediate version.
   2544     if (const auto *CI = dyn_cast<ConstantInt>(RHS)) {
   2545       static const unsigned Opc[2][4] = {
   2546         { X86::INC8r, X86::INC16r, X86::INC32r, X86::INC64r },
   2547         { X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
   2548       };
   2549 
   2550       if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
   2551         ResultReg = createResultReg(TLI.getRegClassFor(VT));
   2552         bool IsDec = BaseOpc == X86ISD::DEC;
   2553         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2554                 TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
   2555           .addReg(LHSReg, getKillRegState(LHSIsKill));
   2556       } else
   2557         ResultReg = fastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill,
   2558                                 CI->getZExtValue());
   2559     }
   2560 
   2561     unsigned RHSReg;
   2562     bool RHSIsKill;
   2563     if (!ResultReg) {
   2564       RHSReg = getRegForValue(RHS);
   2565       if (RHSReg == 0)
   2566         return false;
   2567       RHSIsKill = hasTrivialKill(RHS);
   2568       ResultReg = fastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg,
   2569                               RHSIsKill);
   2570     }
   2571 
   2572     // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit
   2573     // it manually.
   2574     if (BaseOpc == X86ISD::UMUL && !ResultReg) {
   2575       static const unsigned MULOpc[] =
   2576         { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r };
   2577       static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX };
   2578       // First copy the first operand into RAX, which is an implicit input to
   2579       // the X86::MUL*r instruction.
   2580       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2581               TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8])
   2582         .addReg(LHSReg, getKillRegState(LHSIsKill));
   2583       ResultReg = fastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8],
   2584                                  TLI.getRegClassFor(VT), RHSReg, RHSIsKill);
   2585     } else if (BaseOpc == X86ISD::SMUL && !ResultReg) {
   2586       static const unsigned MULOpc[] =
   2587         { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr };
   2588       if (VT == MVT::i8) {
   2589         // Copy the first operand into AL, which is an implicit input to the
   2590         // X86::IMUL8r instruction.
   2591         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2592                TII.get(TargetOpcode::COPY), X86::AL)
   2593           .addReg(LHSReg, getKillRegState(LHSIsKill));
   2594         ResultReg = fastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg,
   2595                                    RHSIsKill);
   2596       } else
   2597         ResultReg = fastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8],
   2598                                     TLI.getRegClassFor(VT), LHSReg, LHSIsKill,
   2599                                     RHSReg, RHSIsKill);
   2600     }
   2601 
   2602     if (!ResultReg)
   2603       return false;
   2604 
   2605     unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
   2606     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
   2607     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
   2608             ResultReg2);
   2609 
   2610     updateValueMap(II, ResultReg, 2);
   2611     return true;
   2612   }
   2613   case Intrinsic::x86_sse_cvttss2si:
   2614   case Intrinsic::x86_sse_cvttss2si64:
   2615   case Intrinsic::x86_sse2_cvttsd2si:
   2616   case Intrinsic::x86_sse2_cvttsd2si64: {
   2617     bool IsInputDouble;
   2618     switch (II->getIntrinsicID()) {
   2619     default: llvm_unreachable("Unexpected intrinsic.");
   2620     case Intrinsic::x86_sse_cvttss2si:
   2621     case Intrinsic::x86_sse_cvttss2si64:
   2622       if (!Subtarget->hasSSE1())
   2623         return false;
   2624       IsInputDouble = false;
   2625       break;
   2626     case Intrinsic::x86_sse2_cvttsd2si:
   2627     case Intrinsic::x86_sse2_cvttsd2si64:
   2628       if (!Subtarget->hasSSE2())
   2629         return false;
   2630       IsInputDouble = true;
   2631       break;
   2632     }
   2633 
   2634     Type *RetTy = II->getCalledFunction()->getReturnType();
   2635     MVT VT;
   2636     if (!isTypeLegal(RetTy, VT))
   2637       return false;
   2638 
   2639     static const unsigned CvtOpc[2][2][2] = {
   2640       { { X86::CVTTSS2SIrr,   X86::VCVTTSS2SIrr   },
   2641         { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr }  },
   2642       { { X86::CVTTSD2SIrr,   X86::VCVTTSD2SIrr   },
   2643         { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr }  }
   2644     };
   2645     bool HasAVX = Subtarget->hasAVX();
   2646     unsigned Opc;
   2647     switch (VT.SimpleTy) {
   2648     default: llvm_unreachable("Unexpected result type.");
   2649     case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break;
   2650     case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break;
   2651     }
   2652 
   2653     // Check if we can fold insertelement instructions into the convert.
   2654     const Value *Op = II->getArgOperand(0);
   2655     while (auto *IE = dyn_cast<InsertElementInst>(Op)) {
   2656       const Value *Index = IE->getOperand(2);
   2657       if (!isa<ConstantInt>(Index))
   2658         break;
   2659       unsigned Idx = cast<ConstantInt>(Index)->getZExtValue();
   2660 
   2661       if (Idx == 0) {
   2662         Op = IE->getOperand(1);
   2663         break;
   2664       }
   2665       Op = IE->getOperand(0);
   2666     }
   2667 
   2668     unsigned Reg = getRegForValue(Op);
   2669     if (Reg == 0)
   2670       return false;
   2671 
   2672     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   2673     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
   2674       .addReg(Reg);
   2675 
   2676     updateValueMap(II, ResultReg);
   2677     return true;
   2678   }
   2679   }
   2680 }
   2681 
   2682 bool X86FastISel::fastLowerArguments() {
   2683   if (!FuncInfo.CanLowerReturn)
   2684     return false;
   2685 
   2686   const Function *F = FuncInfo.Fn;
   2687   if (F->isVarArg())
   2688     return false;
   2689 
   2690   CallingConv::ID CC = F->getCallingConv();
   2691   if (CC != CallingConv::C)
   2692     return false;
   2693 
   2694   if (Subtarget->isCallingConvWin64(CC))
   2695     return false;
   2696 
   2697   if (!Subtarget->is64Bit())
   2698     return false;
   2699 
   2700   // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
   2701   unsigned GPRCnt = 0;
   2702   unsigned FPRCnt = 0;
   2703   unsigned Idx = 0;
   2704   for (auto const &Arg : F->args()) {
   2705     // The first argument is at index 1.
   2706     ++Idx;
   2707     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
   2708         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
   2709         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
   2710         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
   2711       return false;
   2712 
   2713     Type *ArgTy = Arg.getType();
   2714     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
   2715       return false;
   2716 
   2717     EVT ArgVT = TLI.getValueType(ArgTy);
   2718     if (!ArgVT.isSimple()) return false;
   2719     switch (ArgVT.getSimpleVT().SimpleTy) {
   2720     default: return false;
   2721     case MVT::i32:
   2722     case MVT::i64:
   2723       ++GPRCnt;
   2724       break;
   2725     case MVT::f32:
   2726     case MVT::f64:
   2727       if (!Subtarget->hasSSE1())
   2728         return false;
   2729       ++FPRCnt;
   2730       break;
   2731     }
   2732 
   2733     if (GPRCnt > 6)
   2734       return false;
   2735 
   2736     if (FPRCnt > 8)
   2737       return false;
   2738   }
   2739 
   2740   static const MCPhysReg GPR32ArgRegs[] = {
   2741     X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
   2742   };
   2743   static const MCPhysReg GPR64ArgRegs[] = {
   2744     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
   2745   };
   2746   static const MCPhysReg XMMArgRegs[] = {
   2747     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
   2748     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
   2749   };
   2750 
   2751   unsigned GPRIdx = 0;
   2752   unsigned FPRIdx = 0;
   2753   for (auto const &Arg : F->args()) {
   2754     MVT VT = TLI.getSimpleValueType(Arg.getType());
   2755     const TargetRegisterClass *RC = TLI.getRegClassFor(VT);
   2756     unsigned SrcReg;
   2757     switch (VT.SimpleTy) {
   2758     default: llvm_unreachable("Unexpected value type.");
   2759     case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
   2760     case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
   2761     case MVT::f32: // fall-through
   2762     case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
   2763     }
   2764     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
   2765     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
   2766     // Without this, EmitLiveInCopies may eliminate the livein if its only
   2767     // use is a bitcast (which isn't turned into an instruction).
   2768     unsigned ResultReg = createResultReg(RC);
   2769     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2770             TII.get(TargetOpcode::COPY), ResultReg)
   2771       .addReg(DstReg, getKillRegState(true));
   2772     updateValueMap(&Arg, ResultReg);
   2773   }
   2774   return true;
   2775 }
   2776 
   2777 static unsigned computeBytesPoppedByCallee(const X86Subtarget *Subtarget,
   2778                                            CallingConv::ID CC,
   2779                                            ImmutableCallSite *CS) {
   2780   if (Subtarget->is64Bit())
   2781     return 0;
   2782   if (Subtarget->getTargetTriple().isOSMSVCRT())
   2783     return 0;
   2784   if (CC == CallingConv::Fast || CC == CallingConv::GHC ||
   2785       CC == CallingConv::HiPE)
   2786     return 0;
   2787   if (CS && !CS->paramHasAttr(1, Attribute::StructRet))
   2788     return 0;
   2789   if (CS && CS->paramHasAttr(1, Attribute::InReg))
   2790     return 0;
   2791   return 4;
   2792 }
   2793 
   2794 bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
   2795   auto &OutVals       = CLI.OutVals;
   2796   auto &OutFlags      = CLI.OutFlags;
   2797   auto &OutRegs       = CLI.OutRegs;
   2798   auto &Ins           = CLI.Ins;
   2799   auto &InRegs        = CLI.InRegs;
   2800   CallingConv::ID CC  = CLI.CallConv;
   2801   bool &IsTailCall    = CLI.IsTailCall;
   2802   bool IsVarArg       = CLI.IsVarArg;
   2803   const Value *Callee = CLI.Callee;
   2804   const char *SymName = CLI.SymName;
   2805 
   2806   bool Is64Bit        = Subtarget->is64Bit();
   2807   bool IsWin64        = Subtarget->isCallingConvWin64(CC);
   2808 
   2809   // Handle only C, fastcc, and webkit_js calling conventions for now.
   2810   switch (CC) {
   2811   default: return false;
   2812   case CallingConv::C:
   2813   case CallingConv::Fast:
   2814   case CallingConv::WebKit_JS:
   2815   case CallingConv::X86_FastCall:
   2816   case CallingConv::X86_64_Win64:
   2817   case CallingConv::X86_64_SysV:
   2818     break;
   2819   }
   2820 
   2821   // Allow SelectionDAG isel to handle tail calls.
   2822   if (IsTailCall)
   2823     return false;
   2824 
   2825   // fastcc with -tailcallopt is intended to provide a guaranteed
   2826   // tail call optimization. Fastisel doesn't know how to do that.
   2827   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
   2828     return false;
   2829 
   2830   // Don't know how to handle Win64 varargs yet.  Nothing special needed for
   2831   // x86-32. Special handling for x86-64 is implemented.
   2832   if (IsVarArg && IsWin64)
   2833     return false;
   2834 
   2835   // Don't know about inalloca yet.
   2836   if (CLI.CS && CLI.CS->hasInAllocaArgument())
   2837     return false;
   2838 
   2839   // Fast-isel doesn't know about callee-pop yet.
   2840   if (X86::isCalleePop(CC, Subtarget->is64Bit(), IsVarArg,
   2841                        TM.Options.GuaranteedTailCallOpt))
   2842     return false;
   2843 
   2844   SmallVector<MVT, 16> OutVTs;
   2845   SmallVector<unsigned, 16> ArgRegs;
   2846 
   2847   // If this is a constant i1/i8/i16 argument, promote to i32 to avoid an extra
   2848   // instruction. This is safe because it is common to all FastISel supported
   2849   // calling conventions on x86.
   2850   for (int i = 0, e = OutVals.size(); i != e; ++i) {
   2851     Value *&Val = OutVals[i];
   2852     ISD::ArgFlagsTy Flags = OutFlags[i];
   2853     if (auto *CI = dyn_cast<ConstantInt>(Val)) {
   2854       if (CI->getBitWidth() < 32) {
   2855         if (Flags.isSExt())
   2856           Val = ConstantExpr::getSExt(CI, Type::getInt32Ty(CI->getContext()));
   2857         else
   2858           Val = ConstantExpr::getZExt(CI, Type::getInt32Ty(CI->getContext()));
   2859       }
   2860     }
   2861 
   2862     // Passing bools around ends up doing a trunc to i1 and passing it.
   2863     // Codegen this as an argument + "and 1".
   2864     MVT VT;
   2865     auto *TI = dyn_cast<TruncInst>(Val);
   2866     unsigned ResultReg;
   2867     if (TI && TI->getType()->isIntegerTy(1) && CLI.CS &&
   2868               (TI->getParent() == CLI.CS->getInstruction()->getParent()) &&
   2869               TI->hasOneUse()) {
   2870       Value *PrevVal = TI->getOperand(0);
   2871       ResultReg = getRegForValue(PrevVal);
   2872 
   2873       if (!ResultReg)
   2874         return false;
   2875 
   2876       if (!isTypeLegal(PrevVal->getType(), VT))
   2877         return false;
   2878 
   2879       ResultReg =
   2880         fastEmit_ri(VT, VT, ISD::AND, ResultReg, hasTrivialKill(PrevVal), 1);
   2881     } else {
   2882       if (!isTypeLegal(Val->getType(), VT))
   2883         return false;
   2884       ResultReg = getRegForValue(Val);
   2885     }
   2886 
   2887     if (!ResultReg)
   2888       return false;
   2889 
   2890     ArgRegs.push_back(ResultReg);
   2891     OutVTs.push_back(VT);
   2892   }
   2893 
   2894   // Analyze operands of the call, assigning locations to each operand.
   2895   SmallVector<CCValAssign, 16> ArgLocs;
   2896   CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, ArgLocs, CLI.RetTy->getContext());
   2897 
   2898   // Allocate shadow area for Win64
   2899   if (IsWin64)
   2900     CCInfo.AllocateStack(32, 8);
   2901 
   2902   CCInfo.AnalyzeCallOperands(OutVTs, OutFlags, CC_X86);
   2903 
   2904   // Get a count of how many bytes are to be pushed on the stack.
   2905   unsigned NumBytes = CCInfo.getNextStackOffset();
   2906 
   2907   // Issue CALLSEQ_START
   2908   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   2909   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown))
   2910     .addImm(NumBytes).addImm(0);
   2911 
   2912   // Walk the register/memloc assignments, inserting copies/loads.
   2913   const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
   2914   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2915     CCValAssign const &VA = ArgLocs[i];
   2916     const Value *ArgVal = OutVals[VA.getValNo()];
   2917     MVT ArgVT = OutVTs[VA.getValNo()];
   2918 
   2919     if (ArgVT == MVT::x86mmx)
   2920       return false;
   2921 
   2922     unsigned ArgReg = ArgRegs[VA.getValNo()];
   2923 
   2924     // Promote the value if needed.
   2925     switch (VA.getLocInfo()) {
   2926     case CCValAssign::Full: break;
   2927     case CCValAssign::SExt: {
   2928       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   2929              "Unexpected extend");
   2930       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
   2931                                        ArgVT, ArgReg);
   2932       assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
   2933       ArgVT = VA.getLocVT();
   2934       break;
   2935     }
   2936     case CCValAssign::ZExt: {
   2937       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   2938              "Unexpected extend");
   2939       bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
   2940                                        ArgVT, ArgReg);
   2941       assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
   2942       ArgVT = VA.getLocVT();
   2943       break;
   2944     }
   2945     case CCValAssign::AExt: {
   2946       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   2947              "Unexpected extend");
   2948       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), ArgReg,
   2949                                        ArgVT, ArgReg);
   2950       if (!Emitted)
   2951         Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), ArgReg,
   2952                                     ArgVT, ArgReg);
   2953       if (!Emitted)
   2954         Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
   2955                                     ArgVT, ArgReg);
   2956 
   2957       assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
   2958       ArgVT = VA.getLocVT();
   2959       break;
   2960     }
   2961     case CCValAssign::BCvt: {
   2962       ArgReg = fastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, ArgReg,
   2963                           /*TODO: Kill=*/false);
   2964       assert(ArgReg && "Failed to emit a bitcast!");
   2965       ArgVT = VA.getLocVT();
   2966       break;
   2967     }
   2968     case CCValAssign::VExt:
   2969       // VExt has not been implemented, so this should be impossible to reach
   2970       // for now.  However, fallback to Selection DAG isel once implemented.
   2971       return false;
   2972     case CCValAssign::AExtUpper:
   2973     case CCValAssign::SExtUpper:
   2974     case CCValAssign::ZExtUpper:
   2975     case CCValAssign::FPExt:
   2976       llvm_unreachable("Unexpected loc info!");
   2977     case CCValAssign::Indirect:
   2978       // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
   2979       // support this.
   2980       return false;
   2981     }
   2982 
   2983     if (VA.isRegLoc()) {
   2984       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   2985               TII.get(TargetOpcode::COPY), VA.getLocReg()).addReg(ArgReg);
   2986       OutRegs.push_back(VA.getLocReg());
   2987     } else {
   2988       assert(VA.isMemLoc());
   2989 
   2990       // Don't emit stores for undef values.
   2991       if (isa<UndefValue>(ArgVal))
   2992         continue;
   2993 
   2994       unsigned LocMemOffset = VA.getLocMemOffset();
   2995       X86AddressMode AM;
   2996       AM.Base.Reg = RegInfo->getStackRegister();
   2997       AM.Disp = LocMemOffset;
   2998       ISD::ArgFlagsTy Flags = OutFlags[VA.getValNo()];
   2999       unsigned Alignment = DL.getABITypeAlignment(ArgVal->getType());
   3000       MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
   3001         MachinePointerInfo::getStack(LocMemOffset), MachineMemOperand::MOStore,
   3002         ArgVT.getStoreSize(), Alignment);
   3003       if (Flags.isByVal()) {
   3004         X86AddressMode SrcAM;
   3005         SrcAM.Base.Reg = ArgReg;
   3006         if (!TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize()))
   3007           return false;
   3008       } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
   3009         // If this is a really simple value, emit this with the Value* version
   3010         // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
   3011         // as it can cause us to reevaluate the argument.
   3012         if (!X86FastEmitStore(ArgVT, ArgVal, AM, MMO))
   3013           return false;
   3014       } else {
   3015         bool ValIsKill = hasTrivialKill(ArgVal);
   3016         if (!X86FastEmitStore(ArgVT, ArgReg, ValIsKill, AM, MMO))
   3017           return false;
   3018       }
   3019     }
   3020   }
   3021 
   3022   // ELF / PIC requires GOT in the EBX register before function calls via PLT
   3023   // GOT pointer.
   3024   if (Subtarget->isPICStyleGOT()) {
   3025     unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   3026     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3027             TII.get(TargetOpcode::COPY), X86::EBX).addReg(Base);
   3028   }
   3029 
   3030   if (Is64Bit && IsVarArg && !IsWin64) {
   3031     // From AMD64 ABI document:
   3032     // For calls that may call functions that use varargs or stdargs
   3033     // (prototype-less calls or calls to functions containing ellipsis (...) in
   3034     // the declaration) %al is used as hidden argument to specify the number
   3035     // of SSE registers used. The contents of %al do not need to match exactly
   3036     // the number of registers, but must be an ubound on the number of SSE
   3037     // registers used and is in the range 0 - 8 inclusive.
   3038 
   3039     // Count the number of XMM registers allocated.
   3040     static const MCPhysReg XMMArgRegs[] = {
   3041       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
   3042       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
   3043     };
   3044     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
   3045     assert((Subtarget->hasSSE1() || !NumXMMRegs)
   3046            && "SSE registers cannot be used when SSE is disabled");
   3047     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri),
   3048             X86::AL).addImm(NumXMMRegs);
   3049   }
   3050 
   3051   // Materialize callee address in a register. FIXME: GV address can be
   3052   // handled with a CALLpcrel32 instead.
   3053   X86AddressMode CalleeAM;
   3054   if (!X86SelectCallAddress(Callee, CalleeAM))
   3055     return false;
   3056 
   3057   unsigned CalleeOp = 0;
   3058   const GlobalValue *GV = nullptr;
   3059   if (CalleeAM.GV != nullptr) {
   3060     GV = CalleeAM.GV;
   3061   } else if (CalleeAM.Base.Reg != 0) {
   3062     CalleeOp = CalleeAM.Base.Reg;
   3063   } else
   3064     return false;
   3065 
   3066   // Issue the call.
   3067   MachineInstrBuilder MIB;
   3068   if (CalleeOp) {
   3069     // Register-indirect call.
   3070     unsigned CallOpc = Is64Bit ? X86::CALL64r : X86::CALL32r;
   3071     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc))
   3072       .addReg(CalleeOp);
   3073   } else {
   3074     // Direct call.
   3075     assert(GV && "Not a direct call");
   3076     unsigned CallOpc = Is64Bit ? X86::CALL64pcrel32 : X86::CALLpcrel32;
   3077 
   3078     // See if we need any target-specific flags on the GV operand.
   3079     unsigned char OpFlags = 0;
   3080 
   3081     // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
   3082     // external symbols most go through the PLT in PIC mode.  If the symbol
   3083     // has hidden or protected visibility, or if it is static or local, then
   3084     // we don't need to use the PLT - we can directly call it.
   3085     if (Subtarget->isTargetELF() &&
   3086         TM.getRelocationModel() == Reloc::PIC_ &&
   3087         GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
   3088       OpFlags = X86II::MO_PLT;
   3089     } else if (Subtarget->isPICStyleStubAny() &&
   3090                (GV->isDeclaration() || GV->isWeakForLinker()) &&
   3091                (!Subtarget->getTargetTriple().isMacOSX() ||
   3092                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
   3093       // PC-relative references to external symbols should go through $stub,
   3094       // unless we're building with the leopard linker or later, which
   3095       // automatically synthesizes these stubs.
   3096       OpFlags = X86II::MO_DARWIN_STUB;
   3097     }
   3098 
   3099     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CallOpc));
   3100     if (SymName)
   3101       MIB.addExternalSymbol(SymName, OpFlags);
   3102     else
   3103       MIB.addGlobalAddress(GV, 0, OpFlags);
   3104   }
   3105 
   3106   // Add a register mask operand representing the call-preserved registers.
   3107   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   3108   MIB.addRegMask(TRI.getCallPreservedMask(*FuncInfo.MF, CC));
   3109 
   3110   // Add an implicit use GOT pointer in EBX.
   3111   if (Subtarget->isPICStyleGOT())
   3112     MIB.addReg(X86::EBX, RegState::Implicit);
   3113 
   3114   if (Is64Bit && IsVarArg && !IsWin64)
   3115     MIB.addReg(X86::AL, RegState::Implicit);
   3116 
   3117   // Add implicit physical register uses to the call.
   3118   for (auto Reg : OutRegs)
   3119     MIB.addReg(Reg, RegState::Implicit);
   3120 
   3121   // Issue CALLSEQ_END
   3122   unsigned NumBytesForCalleeToPop =
   3123     computeBytesPoppedByCallee(Subtarget, CC, CLI.CS);
   3124   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   3125   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp))
   3126     .addImm(NumBytes).addImm(NumBytesForCalleeToPop);
   3127 
   3128   // Now handle call return values.
   3129   SmallVector<CCValAssign, 16> RVLocs;
   3130   CCState CCRetInfo(CC, IsVarArg, *FuncInfo.MF, RVLocs,
   3131                     CLI.RetTy->getContext());
   3132   CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
   3133 
   3134   // Copy all of the result registers out of their specified physreg.
   3135   unsigned ResultReg = FuncInfo.CreateRegs(CLI.RetTy);
   3136   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   3137     CCValAssign &VA = RVLocs[i];
   3138     EVT CopyVT = VA.getValVT();
   3139     unsigned CopyReg = ResultReg + i;
   3140 
   3141     // If this is x86-64, and we disabled SSE, we can't return FP values
   3142     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64) &&
   3143         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget->hasSSE1())) {
   3144       report_fatal_error("SSE register return with SSE disabled");
   3145     }
   3146 
   3147     // If we prefer to use the value in xmm registers, copy it out as f80 and
   3148     // use a truncate to move it from fp stack reg to xmm reg.
   3149     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
   3150         isScalarFPTypeInSSEReg(VA.getValVT())) {
   3151       CopyVT = MVT::f80;
   3152       CopyReg = createResultReg(&X86::RFP80RegClass);
   3153     }
   3154 
   3155     // Copy out the result.
   3156     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3157             TII.get(TargetOpcode::COPY), CopyReg).addReg(VA.getLocReg());
   3158     InRegs.push_back(VA.getLocReg());
   3159 
   3160     // Round the f80 to the right size, which also moves it to the appropriate
   3161     // xmm register. This is accomplished by storing the f80 value in memory
   3162     // and then loading it back.
   3163     if (CopyVT != VA.getValVT()) {
   3164       EVT ResVT = VA.getValVT();
   3165       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
   3166       unsigned MemSize = ResVT.getSizeInBits()/8;
   3167       int FI = MFI.CreateStackObject(MemSize, MemSize, false);
   3168       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3169                                 TII.get(Opc)), FI)
   3170         .addReg(CopyReg);
   3171       Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
   3172       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3173                                 TII.get(Opc), ResultReg + i), FI);
   3174     }
   3175   }
   3176 
   3177   CLI.ResultReg = ResultReg;
   3178   CLI.NumResultRegs = RVLocs.size();
   3179   CLI.Call = MIB;
   3180 
   3181   return true;
   3182 }
   3183 
   3184 bool
   3185 X86FastISel::fastSelectInstruction(const Instruction *I)  {
   3186   switch (I->getOpcode()) {
   3187   default: break;
   3188   case Instruction::Load:
   3189     return X86SelectLoad(I);
   3190   case Instruction::Store:
   3191     return X86SelectStore(I);
   3192   case Instruction::Ret:
   3193     return X86SelectRet(I);
   3194   case Instruction::ICmp:
   3195   case Instruction::FCmp:
   3196     return X86SelectCmp(I);
   3197   case Instruction::ZExt:
   3198     return X86SelectZExt(I);
   3199   case Instruction::Br:
   3200     return X86SelectBranch(I);
   3201   case Instruction::LShr:
   3202   case Instruction::AShr:
   3203   case Instruction::Shl:
   3204     return X86SelectShift(I);
   3205   case Instruction::SDiv:
   3206   case Instruction::UDiv:
   3207   case Instruction::SRem:
   3208   case Instruction::URem:
   3209     return X86SelectDivRem(I);
   3210   case Instruction::Select:
   3211     return X86SelectSelect(I);
   3212   case Instruction::Trunc:
   3213     return X86SelectTrunc(I);
   3214   case Instruction::FPExt:
   3215     return X86SelectFPExt(I);
   3216   case Instruction::FPTrunc:
   3217     return X86SelectFPTrunc(I);
   3218   case Instruction::SIToFP:
   3219     return X86SelectSIToFP(I);
   3220   case Instruction::IntToPtr: // Deliberate fall-through.
   3221   case Instruction::PtrToInt: {
   3222     EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   3223     EVT DstVT = TLI.getValueType(I->getType());
   3224     if (DstVT.bitsGT(SrcVT))
   3225       return X86SelectZExt(I);
   3226     if (DstVT.bitsLT(SrcVT))
   3227       return X86SelectTrunc(I);
   3228     unsigned Reg = getRegForValue(I->getOperand(0));
   3229     if (Reg == 0) return false;
   3230     updateValueMap(I, Reg);
   3231     return true;
   3232   }
   3233   }
   3234 
   3235   return false;
   3236 }
   3237 
   3238 unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
   3239   if (VT > MVT::i64)
   3240     return 0;
   3241 
   3242   uint64_t Imm = CI->getZExtValue();
   3243   if (Imm == 0) {
   3244     unsigned SrcReg = fastEmitInst_(X86::MOV32r0, &X86::GR32RegClass);
   3245     switch (VT.SimpleTy) {
   3246     default: llvm_unreachable("Unexpected value type");
   3247     case MVT::i1:
   3248     case MVT::i8:
   3249       return fastEmitInst_extractsubreg(MVT::i8, SrcReg, /*Kill=*/true,
   3250                                         X86::sub_8bit);
   3251     case MVT::i16:
   3252       return fastEmitInst_extractsubreg(MVT::i16, SrcReg, /*Kill=*/true,
   3253                                         X86::sub_16bit);
   3254     case MVT::i32:
   3255       return SrcReg;
   3256     case MVT::i64: {
   3257       unsigned ResultReg = createResultReg(&X86::GR64RegClass);
   3258       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3259               TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
   3260         .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
   3261       return ResultReg;
   3262     }
   3263     }
   3264   }
   3265 
   3266   unsigned Opc = 0;
   3267   switch (VT.SimpleTy) {
   3268   default: llvm_unreachable("Unexpected value type");
   3269   case MVT::i1:  VT = MVT::i8; // fall-through
   3270   case MVT::i8:  Opc = X86::MOV8ri;  break;
   3271   case MVT::i16: Opc = X86::MOV16ri; break;
   3272   case MVT::i32: Opc = X86::MOV32ri; break;
   3273   case MVT::i64: {
   3274     if (isUInt<32>(Imm))
   3275       Opc = X86::MOV32ri;
   3276     else if (isInt<32>(Imm))
   3277       Opc = X86::MOV64ri32;
   3278     else
   3279       Opc = X86::MOV64ri;
   3280     break;
   3281   }
   3282   }
   3283   if (VT == MVT::i64 && Opc == X86::MOV32ri) {
   3284     unsigned SrcReg = fastEmitInst_i(Opc, &X86::GR32RegClass, Imm);
   3285     unsigned ResultReg = createResultReg(&X86::GR64RegClass);
   3286     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3287             TII.get(TargetOpcode::SUBREG_TO_REG), ResultReg)
   3288       .addImm(0).addReg(SrcReg).addImm(X86::sub_32bit);
   3289     return ResultReg;
   3290   }
   3291   return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm);
   3292 }
   3293 
   3294 unsigned X86FastISel::X86MaterializeFP(const ConstantFP *CFP, MVT VT) {
   3295   if (CFP->isNullValue())
   3296     return fastMaterializeFloatZero(CFP);
   3297 
   3298   // Can't handle alternate code models yet.
   3299   CodeModel::Model CM = TM.getCodeModel();
   3300   if (CM != CodeModel::Small && CM != CodeModel::Large)
   3301     return 0;
   3302 
   3303   // Get opcode and regclass of the output for the given load instruction.
   3304   unsigned Opc = 0;
   3305   const TargetRegisterClass *RC = nullptr;
   3306   switch (VT.SimpleTy) {
   3307   default: return 0;
   3308   case MVT::f32:
   3309     if (X86ScalarSSEf32) {
   3310       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
   3311       RC  = &X86::FR32RegClass;
   3312     } else {
   3313       Opc = X86::LD_Fp32m;
   3314       RC  = &X86::RFP32RegClass;
   3315     }
   3316     break;
   3317   case MVT::f64:
   3318     if (X86ScalarSSEf64) {
   3319       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
   3320       RC  = &X86::FR64RegClass;
   3321     } else {
   3322       Opc = X86::LD_Fp64m;
   3323       RC  = &X86::RFP64RegClass;
   3324     }
   3325     break;
   3326   case MVT::f80:
   3327     // No f80 support yet.
   3328     return 0;
   3329   }
   3330 
   3331   // MachineConstantPool wants an explicit alignment.
   3332   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
   3333   if (Align == 0) {
   3334     // Alignment of vector types. FIXME!
   3335     Align = DL.getTypeAllocSize(CFP->getType());
   3336   }
   3337 
   3338   // x86-32 PIC requires a PIC base register for constant pools.
   3339   unsigned PICBase = 0;
   3340   unsigned char OpFlag = 0;
   3341   if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
   3342     OpFlag = X86II::MO_PIC_BASE_OFFSET;
   3343     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   3344   } else if (Subtarget->isPICStyleGOT()) {
   3345     OpFlag = X86II::MO_GOTOFF;
   3346     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   3347   } else if (Subtarget->isPICStyleRIPRel() &&
   3348              TM.getCodeModel() == CodeModel::Small) {
   3349     PICBase = X86::RIP;
   3350   }
   3351 
   3352   // Create the load from the constant pool.
   3353   unsigned CPI = MCP.getConstantPoolIndex(CFP, Align);
   3354   unsigned ResultReg = createResultReg(RC);
   3355 
   3356   if (CM == CodeModel::Large) {
   3357     unsigned AddrReg = createResultReg(&X86::GR64RegClass);
   3358     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
   3359             AddrReg)
   3360       .addConstantPoolIndex(CPI, 0, OpFlag);
   3361     MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3362                                       TII.get(Opc), ResultReg);
   3363     addDirectMem(MIB, AddrReg);
   3364     MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
   3365         MachinePointerInfo::getConstantPool(), MachineMemOperand::MOLoad,
   3366         TM.getDataLayout()->getPointerSize(), Align);
   3367     MIB->addMemOperand(*FuncInfo.MF, MMO);
   3368     return ResultReg;
   3369   }
   3370 
   3371   addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3372                                    TII.get(Opc), ResultReg),
   3373                            CPI, PICBase, OpFlag);
   3374   return ResultReg;
   3375 }
   3376 
   3377 unsigned X86FastISel::X86MaterializeGV(const GlobalValue *GV, MVT VT) {
   3378   // Can't handle alternate code models yet.
   3379   if (TM.getCodeModel() != CodeModel::Small)
   3380     return 0;
   3381 
   3382   // Materialize addresses with LEA/MOV instructions.
   3383   X86AddressMode AM;
   3384   if (X86SelectAddress(GV, AM)) {
   3385     // If the expression is just a basereg, then we're done, otherwise we need
   3386     // to emit an LEA.
   3387     if (AM.BaseType == X86AddressMode::RegBase &&
   3388         AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr)
   3389       return AM.Base.Reg;
   3390 
   3391     unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
   3392     if (TM.getRelocationModel() == Reloc::Static &&
   3393         TLI.getPointerTy() == MVT::i64) {
   3394       // The displacement code could be more than 32 bits away so we need to use
   3395       // an instruction with a 64 bit immediate
   3396       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV64ri),
   3397               ResultReg)
   3398         .addGlobalAddress(GV);
   3399     } else {
   3400       unsigned Opc = TLI.getPointerTy() == MVT::i32
   3401                      ? (Subtarget->isTarget64BitILP32()
   3402                         ? X86::LEA64_32r : X86::LEA32r)
   3403                      : X86::LEA64r;
   3404       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3405                              TII.get(Opc), ResultReg), AM);
   3406     }
   3407     return ResultReg;
   3408   }
   3409   return 0;
   3410 }
   3411 
   3412 unsigned X86FastISel::fastMaterializeConstant(const Constant *C) {
   3413   EVT CEVT = TLI.getValueType(C->getType(), true);
   3414 
   3415   // Only handle simple types.
   3416   if (!CEVT.isSimple())
   3417     return 0;
   3418   MVT VT = CEVT.getSimpleVT();
   3419 
   3420   if (const auto *CI = dyn_cast<ConstantInt>(C))
   3421     return X86MaterializeInt(CI, VT);
   3422   else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
   3423     return X86MaterializeFP(CFP, VT);
   3424   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
   3425     return X86MaterializeGV(GV, VT);
   3426 
   3427   return 0;
   3428 }
   3429 
   3430 unsigned X86FastISel::fastMaterializeAlloca(const AllocaInst *C) {
   3431   // Fail on dynamic allocas. At this point, getRegForValue has already
   3432   // checked its CSE maps, so if we're here trying to handle a dynamic
   3433   // alloca, we're not going to succeed. X86SelectAddress has a
   3434   // check for dynamic allocas, because it's called directly from
   3435   // various places, but targetMaterializeAlloca also needs a check
   3436   // in order to avoid recursion between getRegForValue,
   3437   // X86SelectAddrss, and targetMaterializeAlloca.
   3438   if (!FuncInfo.StaticAllocaMap.count(C))
   3439     return 0;
   3440   assert(C->isStaticAlloca() && "dynamic alloca in the static alloca map?");
   3441 
   3442   X86AddressMode AM;
   3443   if (!X86SelectAddress(C, AM))
   3444     return 0;
   3445   unsigned Opc = TLI.getPointerTy() == MVT::i32
   3446                  ? (Subtarget->isTarget64BitILP32()
   3447                     ? X86::LEA64_32r : X86::LEA32r)
   3448                  : X86::LEA64r;
   3449   const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
   3450   unsigned ResultReg = createResultReg(RC);
   3451   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
   3452                          TII.get(Opc), ResultReg), AM);
   3453   return ResultReg;
   3454 }
   3455 
   3456 unsigned X86FastISel::fastMaterializeFloatZero(const ConstantFP *CF) {
   3457   MVT VT;
   3458   if (!isTypeLegal(CF->getType(), VT))
   3459     return 0;
   3460 
   3461   // Get opcode and regclass for the given zero.
   3462   unsigned Opc = 0;
   3463   const TargetRegisterClass *RC = nullptr;
   3464   switch (VT.SimpleTy) {
   3465   default: return 0;
   3466   case MVT::f32:
   3467     if (X86ScalarSSEf32) {
   3468       Opc = X86::FsFLD0SS;
   3469       RC  = &X86::FR32RegClass;
   3470     } else {
   3471       Opc = X86::LD_Fp032;
   3472       RC  = &X86::RFP32RegClass;
   3473     }
   3474     break;
   3475   case MVT::f64:
   3476     if (X86ScalarSSEf64) {
   3477       Opc = X86::FsFLD0SD;
   3478       RC  = &X86::FR64RegClass;
   3479     } else {
   3480       Opc = X86::LD_Fp064;
   3481       RC  = &X86::RFP64RegClass;
   3482     }
   3483     break;
   3484   case MVT::f80:
   3485     // No f80 support yet.
   3486     return 0;
   3487   }
   3488 
   3489   unsigned ResultReg = createResultReg(RC);
   3490   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg);
   3491   return ResultReg;
   3492 }
   3493 
   3494 
   3495 bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   3496                                       const LoadInst *LI) {
   3497   const Value *Ptr = LI->getPointerOperand();
   3498   X86AddressMode AM;
   3499   if (!X86SelectAddress(Ptr, AM))
   3500     return false;
   3501 
   3502   const X86InstrInfo &XII = (const X86InstrInfo &)TII;
   3503 
   3504   unsigned Size = DL.getTypeAllocSize(LI->getType());
   3505   unsigned Alignment = LI->getAlignment();
   3506 
   3507   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
   3508     Alignment = DL.getABITypeAlignment(LI->getType());
   3509 
   3510   SmallVector<MachineOperand, 8> AddrOps;
   3511   AM.getFullAddress(AddrOps);
   3512 
   3513   MachineInstr *Result =
   3514     XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps,
   3515                               Size, Alignment, /*AllowCommute=*/true);
   3516   if (!Result)
   3517     return false;
   3518 
   3519   Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI));
   3520   FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
   3521   MI->eraseFromParent();
   3522   return true;
   3523 }
   3524 
   3525 
   3526 namespace llvm {
   3527   FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
   3528                                 const TargetLibraryInfo *libInfo) {
   3529     return new X86FastISel(funcInfo, libInfo);
   3530   }
   3531 }
   3532