Home | History | Annotate | Download | only in X86
      1 //===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the X86-specific support for the FastISel class. Much
     11 // of the target-specific code is generated by tablegen in the file
     12 // X86GenFastISel.inc, which is #included here.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "X86.h"
     17 #include "X86ISelLowering.h"
     18 #include "X86InstrBuilder.h"
     19 #include "X86RegisterInfo.h"
     20 #include "X86Subtarget.h"
     21 #include "X86TargetMachine.h"
     22 #include "llvm/CodeGen/Analysis.h"
     23 #include "llvm/CodeGen/FastISel.h"
     24 #include "llvm/CodeGen/FunctionLoweringInfo.h"
     25 #include "llvm/CodeGen/MachineConstantPool.h"
     26 #include "llvm/CodeGen/MachineFrameInfo.h"
     27 #include "llvm/CodeGen/MachineRegisterInfo.h"
     28 #include "llvm/IR/CallingConv.h"
     29 #include "llvm/IR/DerivedTypes.h"
     30 #include "llvm/IR/GlobalAlias.h"
     31 #include "llvm/IR/GlobalVariable.h"
     32 #include "llvm/IR/Instructions.h"
     33 #include "llvm/IR/IntrinsicInst.h"
     34 #include "llvm/IR/Operator.h"
     35 #include "llvm/Support/CallSite.h"
     36 #include "llvm/Support/ErrorHandling.h"
     37 #include "llvm/Support/GetElementPtrTypeIterator.h"
     38 #include "llvm/Target/TargetOptions.h"
     39 using namespace llvm;
     40 
     41 namespace {
     42 
     43 class X86FastISel : public FastISel {
     44   /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
     45   /// make the right decision when generating code for different targets.
     46   const X86Subtarget *Subtarget;
     47 
     48   /// RegInfo - X86 register info.
     49   ///
     50   const X86RegisterInfo *RegInfo;
     51 
     52   /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
     53   /// floating point ops.
     54   /// When SSE is available, use it for f32 operations.
     55   /// When SSE2 is available, use it for f64 operations.
     56   bool X86ScalarSSEf64;
     57   bool X86ScalarSSEf32;
     58 
     59 public:
     60   explicit X86FastISel(FunctionLoweringInfo &funcInfo,
     61                        const TargetLibraryInfo *libInfo)
     62     : FastISel(funcInfo, libInfo) {
     63     Subtarget = &TM.getSubtarget<X86Subtarget>();
     64     X86ScalarSSEf64 = Subtarget->hasSSE2();
     65     X86ScalarSSEf32 = Subtarget->hasSSE1();
     66     RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
     67   }
     68 
     69   virtual bool TargetSelectInstruction(const Instruction *I);
     70 
     71   /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
     72   /// vreg is being provided by the specified load instruction.  If possible,
     73   /// try to fold the load as an operand to the instruction, returning true if
     74   /// possible.
     75   virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
     76                              const LoadInst *LI);
     77 
     78   virtual bool FastLowerArguments();
     79 
     80 #include "X86GenFastISel.inc"
     81 
     82 private:
     83   bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
     84 
     85   bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
     86 
     87   bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
     88   bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
     89 
     90   bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
     91                          unsigned &ResultReg);
     92 
     93   bool X86SelectAddress(const Value *V, X86AddressMode &AM);
     94   bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
     95 
     96   bool X86SelectLoad(const Instruction *I);
     97 
     98   bool X86SelectStore(const Instruction *I);
     99 
    100   bool X86SelectRet(const Instruction *I);
    101 
    102   bool X86SelectCmp(const Instruction *I);
    103 
    104   bool X86SelectZExt(const Instruction *I);
    105 
    106   bool X86SelectBranch(const Instruction *I);
    107 
    108   bool X86SelectShift(const Instruction *I);
    109 
    110   bool X86SelectSelect(const Instruction *I);
    111 
    112   bool X86SelectTrunc(const Instruction *I);
    113 
    114   bool X86SelectFPExt(const Instruction *I);
    115   bool X86SelectFPTrunc(const Instruction *I);
    116 
    117   bool X86VisitIntrinsicCall(const IntrinsicInst &I);
    118   bool X86SelectCall(const Instruction *I);
    119 
    120   bool DoSelectCall(const Instruction *I, const char *MemIntName);
    121 
    122   const X86InstrInfo *getInstrInfo() const {
    123     return getTargetMachine()->getInstrInfo();
    124   }
    125   const X86TargetMachine *getTargetMachine() const {
    126     return static_cast<const X86TargetMachine *>(&TM);
    127   }
    128 
    129   unsigned TargetMaterializeConstant(const Constant *C);
    130 
    131   unsigned TargetMaterializeAlloca(const AllocaInst *C);
    132 
    133   unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
    134 
    135   /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
    136   /// computed in an SSE register, not on the X87 floating point stack.
    137   bool isScalarFPTypeInSSEReg(EVT VT) const {
    138     return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
    139       (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
    140   }
    141 
    142   bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
    143 
    144   bool IsMemcpySmall(uint64_t Len);
    145 
    146   bool TryEmitSmallMemcpy(X86AddressMode DestAM,
    147                           X86AddressMode SrcAM, uint64_t Len);
    148 };
    149 
    150 } // end anonymous namespace.
    151 
    152 bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
    153   EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
    154   if (evt == MVT::Other || !evt.isSimple())
    155     // Unhandled type. Halt "fast" selection and bail.
    156     return false;
    157 
    158   VT = evt.getSimpleVT();
    159   // For now, require SSE/SSE2 for performing floating-point operations,
    160   // since x87 requires additional work.
    161   if (VT == MVT::f64 && !X86ScalarSSEf64)
    162     return false;
    163   if (VT == MVT::f32 && !X86ScalarSSEf32)
    164     return false;
    165   // Similarly, no f80 support yet.
    166   if (VT == MVT::f80)
    167     return false;
    168   // We only handle legal types. For example, on x86-32 the instruction
    169   // selector contains all of the 64-bit instructions from x86-64,
    170   // under the assumption that i64 won't be used if the target doesn't
    171   // support it.
    172   return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
    173 }
    174 
    175 #include "X86GenCallingConv.inc"
    176 
    177 /// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
    178 /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
    179 /// Return true and the result register by reference if it is possible.
    180 bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
    181                                   unsigned &ResultReg) {
    182   // Get opcode and regclass of the output for the given load instruction.
    183   unsigned Opc = 0;
    184   const TargetRegisterClass *RC = NULL;
    185   switch (VT.getSimpleVT().SimpleTy) {
    186   default: return false;
    187   case MVT::i1:
    188   case MVT::i8:
    189     Opc = X86::MOV8rm;
    190     RC  = &X86::GR8RegClass;
    191     break;
    192   case MVT::i16:
    193     Opc = X86::MOV16rm;
    194     RC  = &X86::GR16RegClass;
    195     break;
    196   case MVT::i32:
    197     Opc = X86::MOV32rm;
    198     RC  = &X86::GR32RegClass;
    199     break;
    200   case MVT::i64:
    201     // Must be in x86-64 mode.
    202     Opc = X86::MOV64rm;
    203     RC  = &X86::GR64RegClass;
    204     break;
    205   case MVT::f32:
    206     if (X86ScalarSSEf32) {
    207       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
    208       RC  = &X86::FR32RegClass;
    209     } else {
    210       Opc = X86::LD_Fp32m;
    211       RC  = &X86::RFP32RegClass;
    212     }
    213     break;
    214   case MVT::f64:
    215     if (X86ScalarSSEf64) {
    216       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
    217       RC  = &X86::FR64RegClass;
    218     } else {
    219       Opc = X86::LD_Fp64m;
    220       RC  = &X86::RFP64RegClass;
    221     }
    222     break;
    223   case MVT::f80:
    224     // No f80 support yet.
    225     return false;
    226   }
    227 
    228   ResultReg = createResultReg(RC);
    229   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
    230                          DL, TII.get(Opc), ResultReg), AM);
    231   return true;
    232 }
    233 
    234 /// X86FastEmitStore - Emit a machine instruction to store a value Val of
    235 /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
    236 /// and a displacement offset, or a GlobalAddress,
    237 /// i.e. V. Return true if it is possible.
    238 bool
    239 X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
    240   // Get opcode and regclass of the output for the given store instruction.
    241   unsigned Opc = 0;
    242   switch (VT.getSimpleVT().SimpleTy) {
    243   case MVT::f80: // No f80 support yet.
    244   default: return false;
    245   case MVT::i1: {
    246     // Mask out all but lowest bit.
    247     unsigned AndResult = createResultReg(&X86::GR8RegClass);
    248     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
    249             TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
    250     Val = AndResult;
    251   }
    252   // FALLTHROUGH, handling i1 as i8.
    253   case MVT::i8:  Opc = X86::MOV8mr;  break;
    254   case MVT::i16: Opc = X86::MOV16mr; break;
    255   case MVT::i32: Opc = X86::MOV32mr; break;
    256   case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
    257   case MVT::f32:
    258     Opc = X86ScalarSSEf32 ?
    259           (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
    260     break;
    261   case MVT::f64:
    262     Opc = X86ScalarSSEf64 ?
    263           (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
    264     break;
    265   case MVT::v4f32:
    266     Opc = X86::MOVAPSmr;
    267     break;
    268   case MVT::v2f64:
    269     Opc = X86::MOVAPDmr;
    270     break;
    271   case MVT::v4i32:
    272   case MVT::v2i64:
    273   case MVT::v8i16:
    274   case MVT::v16i8:
    275     Opc = X86::MOVDQAmr;
    276     break;
    277   }
    278 
    279   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
    280                          DL, TII.get(Opc)), AM).addReg(Val);
    281   return true;
    282 }
    283 
    284 bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
    285                                    const X86AddressMode &AM) {
    286   // Handle 'null' like i32/i64 0.
    287   if (isa<ConstantPointerNull>(Val))
    288     Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
    289 
    290   // If this is a store of a simple constant, fold the constant into the store.
    291   if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
    292     unsigned Opc = 0;
    293     bool Signed = true;
    294     switch (VT.getSimpleVT().SimpleTy) {
    295     default: break;
    296     case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
    297     case MVT::i8:  Opc = X86::MOV8mi;  break;
    298     case MVT::i16: Opc = X86::MOV16mi; break;
    299     case MVT::i32: Opc = X86::MOV32mi; break;
    300     case MVT::i64:
    301       // Must be a 32-bit sign extended value.
    302       if (isInt<32>(CI->getSExtValue()))
    303         Opc = X86::MOV64mi32;
    304       break;
    305     }
    306 
    307     if (Opc) {
    308       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
    309                              DL, TII.get(Opc)), AM)
    310                              .addImm(Signed ? (uint64_t) CI->getSExtValue() :
    311                                               CI->getZExtValue());
    312       return true;
    313     }
    314   }
    315 
    316   unsigned ValReg = getRegForValue(Val);
    317   if (ValReg == 0)
    318     return false;
    319 
    320   return X86FastEmitStore(VT, ValReg, AM);
    321 }
    322 
    323 /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
    324 /// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
    325 /// ISD::SIGN_EXTEND).
    326 bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
    327                                     unsigned Src, EVT SrcVT,
    328                                     unsigned &ResultReg) {
    329   unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
    330                            Src, /*TODO: Kill=*/false);
    331   if (RR == 0)
    332     return false;
    333 
    334   ResultReg = RR;
    335   return true;
    336 }
    337 
    338 /// X86SelectAddress - Attempt to fill in an address from the given value.
    339 ///
    340 bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
    341   const User *U = NULL;
    342   unsigned Opcode = Instruction::UserOp1;
    343   if (const Instruction *I = dyn_cast<Instruction>(V)) {
    344     // Don't walk into other basic blocks; it's possible we haven't
    345     // visited them yet, so the instructions may not yet be assigned
    346     // virtual registers.
    347     if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
    348         FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
    349       Opcode = I->getOpcode();
    350       U = I;
    351     }
    352   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
    353     Opcode = C->getOpcode();
    354     U = C;
    355   }
    356 
    357   if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
    358     if (Ty->getAddressSpace() > 255)
    359       // Fast instruction selection doesn't support the special
    360       // address spaces.
    361       return false;
    362 
    363   switch (Opcode) {
    364   default: break;
    365   case Instruction::BitCast:
    366     // Look past bitcasts.
    367     return X86SelectAddress(U->getOperand(0), AM);
    368 
    369   case Instruction::IntToPtr:
    370     // Look past no-op inttoptrs.
    371     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    372       return X86SelectAddress(U->getOperand(0), AM);
    373     break;
    374 
    375   case Instruction::PtrToInt:
    376     // Look past no-op ptrtoints.
    377     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    378       return X86SelectAddress(U->getOperand(0), AM);
    379     break;
    380 
    381   case Instruction::Alloca: {
    382     // Do static allocas.
    383     const AllocaInst *A = cast<AllocaInst>(V);
    384     DenseMap<const AllocaInst*, int>::iterator SI =
    385       FuncInfo.StaticAllocaMap.find(A);
    386     if (SI != FuncInfo.StaticAllocaMap.end()) {
    387       AM.BaseType = X86AddressMode::FrameIndexBase;
    388       AM.Base.FrameIndex = SI->second;
    389       return true;
    390     }
    391     break;
    392   }
    393 
    394   case Instruction::Add: {
    395     // Adds of constants are common and easy enough.
    396     if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
    397       uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
    398       // They have to fit in the 32-bit signed displacement field though.
    399       if (isInt<32>(Disp)) {
    400         AM.Disp = (uint32_t)Disp;
    401         return X86SelectAddress(U->getOperand(0), AM);
    402       }
    403     }
    404     break;
    405   }
    406 
    407   case Instruction::GetElementPtr: {
    408     X86AddressMode SavedAM = AM;
    409 
    410     // Pattern-match simple GEPs.
    411     uint64_t Disp = (int32_t)AM.Disp;
    412     unsigned IndexReg = AM.IndexReg;
    413     unsigned Scale = AM.Scale;
    414     gep_type_iterator GTI = gep_type_begin(U);
    415     // Iterate through the indices, folding what we can. Constants can be
    416     // folded, and one dynamic index can be handled, if the scale is supported.
    417     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
    418          i != e; ++i, ++GTI) {
    419       const Value *Op = *i;
    420       if (StructType *STy = dyn_cast<StructType>(*GTI)) {
    421         const StructLayout *SL = TD.getStructLayout(STy);
    422         Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
    423         continue;
    424       }
    425 
    426       // A array/variable index is always of the form i*S where S is the
    427       // constant scale size.  See if we can push the scale into immediates.
    428       uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
    429       for (;;) {
    430         if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
    431           // Constant-offset addressing.
    432           Disp += CI->getSExtValue() * S;
    433           break;
    434         }
    435         if (isa<AddOperator>(Op) &&
    436             (!isa<Instruction>(Op) ||
    437              FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
    438                == FuncInfo.MBB) &&
    439             isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
    440           // An add (in the same block) with a constant operand. Fold the
    441           // constant.
    442           ConstantInt *CI =
    443             cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
    444           Disp += CI->getSExtValue() * S;
    445           // Iterate on the other operand.
    446           Op = cast<AddOperator>(Op)->getOperand(0);
    447           continue;
    448         }
    449         if (IndexReg == 0 &&
    450             (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
    451             (S == 1 || S == 2 || S == 4 || S == 8)) {
    452           // Scaled-index addressing.
    453           Scale = S;
    454           IndexReg = getRegForGEPIndex(Op).first;
    455           if (IndexReg == 0)
    456             return false;
    457           break;
    458         }
    459         // Unsupported.
    460         goto unsupported_gep;
    461       }
    462     }
    463     // Check for displacement overflow.
    464     if (!isInt<32>(Disp))
    465       break;
    466     // Ok, the GEP indices were covered by constant-offset and scaled-index
    467     // addressing. Update the address state and move on to examining the base.
    468     AM.IndexReg = IndexReg;
    469     AM.Scale = Scale;
    470     AM.Disp = (uint32_t)Disp;
    471     if (X86SelectAddress(U->getOperand(0), AM))
    472       return true;
    473 
    474     // If we couldn't merge the gep value into this addr mode, revert back to
    475     // our address and just match the value instead of completely failing.
    476     AM = SavedAM;
    477     break;
    478   unsupported_gep:
    479     // Ok, the GEP indices weren't all covered.
    480     break;
    481   }
    482   }
    483 
    484   // Handle constant address.
    485   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
    486     // Can't handle alternate code models yet.
    487     if (TM.getCodeModel() != CodeModel::Small)
    488       return false;
    489 
    490     // Can't handle TLS yet.
    491     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
    492       if (GVar->isThreadLocal())
    493         return false;
    494 
    495     // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
    496     // it works...).
    497     if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
    498       if (const GlobalVariable *GVar =
    499             dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
    500         if (GVar->isThreadLocal())
    501           return false;
    502 
    503     // RIP-relative addresses can't have additional register operands, so if
    504     // we've already folded stuff into the addressing mode, just force the
    505     // global value into its own register, which we can use as the basereg.
    506     if (!Subtarget->isPICStyleRIPRel() ||
    507         (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
    508       // Okay, we've committed to selecting this global. Set up the address.
    509       AM.GV = GV;
    510 
    511       // Allow the subtarget to classify the global.
    512       unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
    513 
    514       // If this reference is relative to the pic base, set it now.
    515       if (isGlobalRelativeToPICBase(GVFlags)) {
    516         // FIXME: How do we know Base.Reg is free??
    517         AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
    518       }
    519 
    520       // Unless the ABI requires an extra load, return a direct reference to
    521       // the global.
    522       if (!isGlobalStubReference(GVFlags)) {
    523         if (Subtarget->isPICStyleRIPRel()) {
    524           // Use rip-relative addressing if we can.  Above we verified that the
    525           // base and index registers are unused.
    526           assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
    527           AM.Base.Reg = X86::RIP;
    528         }
    529         AM.GVOpFlags = GVFlags;
    530         return true;
    531       }
    532 
    533       // Ok, we need to do a load from a stub.  If we've already loaded from
    534       // this stub, reuse the loaded pointer, otherwise emit the load now.
    535       DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
    536       unsigned LoadReg;
    537       if (I != LocalValueMap.end() && I->second != 0) {
    538         LoadReg = I->second;
    539       } else {
    540         // Issue load from stub.
    541         unsigned Opc = 0;
    542         const TargetRegisterClass *RC = NULL;
    543         X86AddressMode StubAM;
    544         StubAM.Base.Reg = AM.Base.Reg;
    545         StubAM.GV = GV;
    546         StubAM.GVOpFlags = GVFlags;
    547 
    548         // Prepare for inserting code in the local-value area.
    549         SavePoint SaveInsertPt = enterLocalValueArea();
    550 
    551         if (TLI.getPointerTy() == MVT::i64) {
    552           Opc = X86::MOV64rm;
    553           RC  = &X86::GR64RegClass;
    554 
    555           if (Subtarget->isPICStyleRIPRel())
    556             StubAM.Base.Reg = X86::RIP;
    557         } else {
    558           Opc = X86::MOV32rm;
    559           RC  = &X86::GR32RegClass;
    560         }
    561 
    562         LoadReg = createResultReg(RC);
    563         MachineInstrBuilder LoadMI =
    564           BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
    565         addFullAddress(LoadMI, StubAM);
    566 
    567         // Ok, back to normal mode.
    568         leaveLocalValueArea(SaveInsertPt);
    569 
    570         // Prevent loading GV stub multiple times in same MBB.
    571         LocalValueMap[V] = LoadReg;
    572       }
    573 
    574       // Now construct the final address. Note that the Disp, Scale,
    575       // and Index values may already be set here.
    576       AM.Base.Reg = LoadReg;
    577       AM.GV = 0;
    578       return true;
    579     }
    580   }
    581 
    582   // If all else fails, try to materialize the value in a register.
    583   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
    584     if (AM.Base.Reg == 0) {
    585       AM.Base.Reg = getRegForValue(V);
    586       return AM.Base.Reg != 0;
    587     }
    588     if (AM.IndexReg == 0) {
    589       assert(AM.Scale == 1 && "Scale with no index!");
    590       AM.IndexReg = getRegForValue(V);
    591       return AM.IndexReg != 0;
    592     }
    593   }
    594 
    595   return false;
    596 }
    597 
    598 /// X86SelectCallAddress - Attempt to fill in an address from the given value.
    599 ///
    600 bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
    601   const User *U = NULL;
    602   unsigned Opcode = Instruction::UserOp1;
    603   if (const Instruction *I = dyn_cast<Instruction>(V)) {
    604     Opcode = I->getOpcode();
    605     U = I;
    606   } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
    607     Opcode = C->getOpcode();
    608     U = C;
    609   }
    610 
    611   switch (Opcode) {
    612   default: break;
    613   case Instruction::BitCast:
    614     // Look past bitcasts.
    615     return X86SelectCallAddress(U->getOperand(0), AM);
    616 
    617   case Instruction::IntToPtr:
    618     // Look past no-op inttoptrs.
    619     if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
    620       return X86SelectCallAddress(U->getOperand(0), AM);
    621     break;
    622 
    623   case Instruction::PtrToInt:
    624     // Look past no-op ptrtoints.
    625     if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
    626       return X86SelectCallAddress(U->getOperand(0), AM);
    627     break;
    628   }
    629 
    630   // Handle constant address.
    631   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
    632     // Can't handle alternate code models yet.
    633     if (TM.getCodeModel() != CodeModel::Small)
    634       return false;
    635 
    636     // RIP-relative addresses can't have additional register operands.
    637     if (Subtarget->isPICStyleRIPRel() &&
    638         (AM.Base.Reg != 0 || AM.IndexReg != 0))
    639       return false;
    640 
    641     // Can't handle DLLImport.
    642     if (GV->hasDLLImportLinkage())
    643       return false;
    644 
    645     // Can't handle TLS.
    646     if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
    647       if (GVar->isThreadLocal())
    648         return false;
    649 
    650     // Okay, we've committed to selecting this global. Set up the basic address.
    651     AM.GV = GV;
    652 
    653     // No ABI requires an extra load for anything other than DLLImport, which
    654     // we rejected above. Return a direct reference to the global.
    655     if (Subtarget->isPICStyleRIPRel()) {
    656       // Use rip-relative addressing if we can.  Above we verified that the
    657       // base and index registers are unused.
    658       assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
    659       AM.Base.Reg = X86::RIP;
    660     } else if (Subtarget->isPICStyleStubPIC()) {
    661       AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
    662     } else if (Subtarget->isPICStyleGOT()) {
    663       AM.GVOpFlags = X86II::MO_GOTOFF;
    664     }
    665 
    666     return true;
    667   }
    668 
    669   // If all else fails, try to materialize the value in a register.
    670   if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
    671     if (AM.Base.Reg == 0) {
    672       AM.Base.Reg = getRegForValue(V);
    673       return AM.Base.Reg != 0;
    674     }
    675     if (AM.IndexReg == 0) {
    676       assert(AM.Scale == 1 && "Scale with no index!");
    677       AM.IndexReg = getRegForValue(V);
    678       return AM.IndexReg != 0;
    679     }
    680   }
    681 
    682   return false;
    683 }
    684 
    685 
    686 /// X86SelectStore - Select and emit code to implement store instructions.
    687 bool X86FastISel::X86SelectStore(const Instruction *I) {
    688   // Atomic stores need special handling.
    689   const StoreInst *S = cast<StoreInst>(I);
    690 
    691   if (S->isAtomic())
    692     return false;
    693 
    694   unsigned SABIAlignment =
    695     TD.getABITypeAlignment(S->getValueOperand()->getType());
    696   if (S->getAlignment() != 0 && S->getAlignment() < SABIAlignment)
    697     return false;
    698 
    699   MVT VT;
    700   if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
    701     return false;
    702 
    703   X86AddressMode AM;
    704   if (!X86SelectAddress(I->getOperand(1), AM))
    705     return false;
    706 
    707   return X86FastEmitStore(VT, I->getOperand(0), AM);
    708 }
    709 
    710 /// X86SelectRet - Select and emit code to implement ret instructions.
    711 bool X86FastISel::X86SelectRet(const Instruction *I) {
    712   const ReturnInst *Ret = cast<ReturnInst>(I);
    713   const Function &F = *I->getParent()->getParent();
    714   const X86MachineFunctionInfo *X86MFInfo =
    715       FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
    716 
    717   if (!FuncInfo.CanLowerReturn)
    718     return false;
    719 
    720   CallingConv::ID CC = F.getCallingConv();
    721   if (CC != CallingConv::C &&
    722       CC != CallingConv::Fast &&
    723       CC != CallingConv::X86_FastCall)
    724     return false;
    725 
    726   if (Subtarget->isTargetWin64())
    727     return false;
    728 
    729   // Don't handle popping bytes on return for now.
    730   if (X86MFInfo->getBytesToPopOnReturn() != 0)
    731     return false;
    732 
    733   // fastcc with -tailcallopt is intended to provide a guaranteed
    734   // tail call optimization. Fastisel doesn't know how to do that.
    735   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
    736     return false;
    737 
    738   // Let SDISel handle vararg functions.
    739   if (F.isVarArg())
    740     return false;
    741 
    742   // Build a list of return value registers.
    743   SmallVector<unsigned, 4> RetRegs;
    744 
    745   if (Ret->getNumOperands() > 0) {
    746     SmallVector<ISD::OutputArg, 4> Outs;
    747     GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
    748 
    749     // Analyze operands of the call, assigning locations to each operand.
    750     SmallVector<CCValAssign, 16> ValLocs;
    751     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
    752                    I->getContext());
    753     CCInfo.AnalyzeReturn(Outs, RetCC_X86);
    754 
    755     const Value *RV = Ret->getOperand(0);
    756     unsigned Reg = getRegForValue(RV);
    757     if (Reg == 0)
    758       return false;
    759 
    760     // Only handle a single return value for now.
    761     if (ValLocs.size() != 1)
    762       return false;
    763 
    764     CCValAssign &VA = ValLocs[0];
    765 
    766     // Don't bother handling odd stuff for now.
    767     if (VA.getLocInfo() != CCValAssign::Full)
    768       return false;
    769     // Only handle register returns for now.
    770     if (!VA.isRegLoc())
    771       return false;
    772 
    773     // The calling-convention tables for x87 returns don't tell
    774     // the whole story.
    775     if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
    776       return false;
    777 
    778     unsigned SrcReg = Reg + VA.getValNo();
    779     EVT SrcVT = TLI.getValueType(RV->getType());
    780     EVT DstVT = VA.getValVT();
    781     // Special handling for extended integers.
    782     if (SrcVT != DstVT) {
    783       if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
    784         return false;
    785 
    786       if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
    787         return false;
    788 
    789       assert(DstVT == MVT::i32 && "X86 should always ext to i32");
    790 
    791       if (SrcVT == MVT::i1) {
    792         if (Outs[0].Flags.isSExt())
    793           return false;
    794         SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
    795         SrcVT = MVT::i8;
    796       }
    797       unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
    798                                              ISD::SIGN_EXTEND;
    799       SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
    800                           SrcReg, /*TODO: Kill=*/false);
    801     }
    802 
    803     // Make the copy.
    804     unsigned DstReg = VA.getLocReg();
    805     const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
    806     // Avoid a cross-class copy. This is very unlikely.
    807     if (!SrcRC->contains(DstReg))
    808       return false;
    809     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
    810             DstReg).addReg(SrcReg);
    811 
    812     // Add register to return instruction.
    813     RetRegs.push_back(VA.getLocReg());
    814   }
    815 
    816   // The x86-64 ABI for returning structs by value requires that we copy
    817   // the sret argument into %rax for the return. We saved the argument into
    818   // a virtual register in the entry block, so now we copy the value out
    819   // and into %rax.
    820   if (Subtarget->is64Bit() && F.hasStructRetAttr()) {
    821     unsigned Reg = X86MFInfo->getSRetReturnReg();
    822     assert(Reg &&
    823            "SRetReturnReg should have been set in LowerFormalArguments()!");
    824     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
    825             X86::RAX).addReg(Reg);
    826     RetRegs.push_back(X86::RAX);
    827   }
    828 
    829   // Now emit the RET.
    830   MachineInstrBuilder MIB =
    831     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
    832   for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
    833     MIB.addReg(RetRegs[i], RegState::Implicit);
    834   return true;
    835 }
    836 
    837 /// X86SelectLoad - Select and emit code to implement load instructions.
    838 ///
    839 bool X86FastISel::X86SelectLoad(const Instruction *I)  {
    840   // Atomic loads need special handling.
    841   if (cast<LoadInst>(I)->isAtomic())
    842     return false;
    843 
    844   MVT VT;
    845   if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
    846     return false;
    847 
    848   X86AddressMode AM;
    849   if (!X86SelectAddress(I->getOperand(0), AM))
    850     return false;
    851 
    852   unsigned ResultReg = 0;
    853   if (X86FastEmitLoad(VT, AM, ResultReg)) {
    854     UpdateValueMap(I, ResultReg);
    855     return true;
    856   }
    857   return false;
    858 }
    859 
    860 static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
    861   bool HasAVX = Subtarget->hasAVX();
    862   bool X86ScalarSSEf32 = Subtarget->hasSSE1();
    863   bool X86ScalarSSEf64 = Subtarget->hasSSE2();
    864 
    865   switch (VT.getSimpleVT().SimpleTy) {
    866   default:       return 0;
    867   case MVT::i8:  return X86::CMP8rr;
    868   case MVT::i16: return X86::CMP16rr;
    869   case MVT::i32: return X86::CMP32rr;
    870   case MVT::i64: return X86::CMP64rr;
    871   case MVT::f32:
    872     return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
    873   case MVT::f64:
    874     return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
    875   }
    876 }
    877 
    878 /// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
    879 /// of the comparison, return an opcode that works for the compare (e.g.
    880 /// CMP32ri) otherwise return 0.
    881 static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
    882   switch (VT.getSimpleVT().SimpleTy) {
    883   // Otherwise, we can't fold the immediate into this comparison.
    884   default: return 0;
    885   case MVT::i8: return X86::CMP8ri;
    886   case MVT::i16: return X86::CMP16ri;
    887   case MVT::i32: return X86::CMP32ri;
    888   case MVT::i64:
    889     // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
    890     // field.
    891     if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
    892       return X86::CMP64ri32;
    893     return 0;
    894   }
    895 }
    896 
    897 bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
    898                                      EVT VT) {
    899   unsigned Op0Reg = getRegForValue(Op0);
    900   if (Op0Reg == 0) return false;
    901 
    902   // Handle 'null' like i32/i64 0.
    903   if (isa<ConstantPointerNull>(Op1))
    904     Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
    905 
    906   // We have two options: compare with register or immediate.  If the RHS of
    907   // the compare is an immediate that we can fold into this compare, use
    908   // CMPri, otherwise use CMPrr.
    909   if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
    910     if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
    911       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
    912         .addReg(Op0Reg)
    913         .addImm(Op1C->getSExtValue());
    914       return true;
    915     }
    916   }
    917 
    918   unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
    919   if (CompareOpc == 0) return false;
    920 
    921   unsigned Op1Reg = getRegForValue(Op1);
    922   if (Op1Reg == 0) return false;
    923   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
    924     .addReg(Op0Reg)
    925     .addReg(Op1Reg);
    926 
    927   return true;
    928 }
    929 
    930 bool X86FastISel::X86SelectCmp(const Instruction *I) {
    931   const CmpInst *CI = cast<CmpInst>(I);
    932 
    933   MVT VT;
    934   if (!isTypeLegal(I->getOperand(0)->getType(), VT))
    935     return false;
    936 
    937   unsigned ResultReg = createResultReg(&X86::GR8RegClass);
    938   unsigned SetCCOpc;
    939   bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
    940   switch (CI->getPredicate()) {
    941   case CmpInst::FCMP_OEQ: {
    942     if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
    943       return false;
    944 
    945     unsigned EReg = createResultReg(&X86::GR8RegClass);
    946     unsigned NPReg = createResultReg(&X86::GR8RegClass);
    947     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
    948     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
    949             TII.get(X86::SETNPr), NPReg);
    950     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
    951             TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
    952     UpdateValueMap(I, ResultReg);
    953     return true;
    954   }
    955   case CmpInst::FCMP_UNE: {
    956     if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
    957       return false;
    958 
    959     unsigned NEReg = createResultReg(&X86::GR8RegClass);
    960     unsigned PReg = createResultReg(&X86::GR8RegClass);
    961     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
    962     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
    963     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
    964       .addReg(PReg).addReg(NEReg);
    965     UpdateValueMap(I, ResultReg);
    966     return true;
    967   }
    968   case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
    969   case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
    970   case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
    971   case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
    972   case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
    973   case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
    974   case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
    975   case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
    976   case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
    977   case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
    978   case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
    979   case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
    980 
    981   case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
    982   case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
    983   case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
    984   case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
    985   case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
    986   case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
    987   case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
    988   case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
    989   case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
    990   case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
    991   default:
    992     return false;
    993   }
    994 
    995   const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
    996   if (SwapArgs)
    997     std::swap(Op0, Op1);
    998 
    999   // Emit a compare of Op0/Op1.
   1000   if (!X86FastEmitCompare(Op0, Op1, VT))
   1001     return false;
   1002 
   1003   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
   1004   UpdateValueMap(I, ResultReg);
   1005   return true;
   1006 }
   1007 
   1008 bool X86FastISel::X86SelectZExt(const Instruction *I) {
   1009   // Handle zero-extension from i1 to i8, which is common.
   1010   if (!I->getOperand(0)->getType()->isIntegerTy(1))
   1011     return false;
   1012 
   1013   EVT DstVT = TLI.getValueType(I->getType());
   1014   if (!TLI.isTypeLegal(DstVT))
   1015     return false;
   1016 
   1017   unsigned ResultReg = getRegForValue(I->getOperand(0));
   1018   if (ResultReg == 0)
   1019     return false;
   1020 
   1021   // Set the high bits to zero.
   1022   ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
   1023   if (ResultReg == 0)
   1024     return false;
   1025 
   1026   if (DstVT != MVT::i8) {
   1027     ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
   1028                            ResultReg, /*Kill=*/true);
   1029     if (ResultReg == 0)
   1030       return false;
   1031   }
   1032 
   1033   UpdateValueMap(I, ResultReg);
   1034   return true;
   1035 }
   1036 
   1037 
   1038 bool X86FastISel::X86SelectBranch(const Instruction *I) {
   1039   // Unconditional branches are selected by tablegen-generated code.
   1040   // Handle a conditional branch.
   1041   const BranchInst *BI = cast<BranchInst>(I);
   1042   MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
   1043   MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
   1044 
   1045   // Fold the common case of a conditional branch with a comparison
   1046   // in the same block (values defined on other blocks may not have
   1047   // initialized registers).
   1048   if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
   1049     if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
   1050       EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
   1051 
   1052       // Try to take advantage of fallthrough opportunities.
   1053       CmpInst::Predicate Predicate = CI->getPredicate();
   1054       if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
   1055         std::swap(TrueMBB, FalseMBB);
   1056         Predicate = CmpInst::getInversePredicate(Predicate);
   1057       }
   1058 
   1059       bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
   1060       unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
   1061 
   1062       switch (Predicate) {
   1063       case CmpInst::FCMP_OEQ:
   1064         std::swap(TrueMBB, FalseMBB);
   1065         Predicate = CmpInst::FCMP_UNE;
   1066         // FALL THROUGH
   1067       case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
   1068       case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
   1069       case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
   1070       case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
   1071       case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
   1072       case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
   1073       case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
   1074       case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
   1075       case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
   1076       case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
   1077       case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
   1078       case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
   1079       case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
   1080 
   1081       case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
   1082       case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
   1083       case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
   1084       case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
   1085       case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
   1086       case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
   1087       case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
   1088       case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
   1089       case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
   1090       case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
   1091       default:
   1092         return false;
   1093       }
   1094 
   1095       const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
   1096       if (SwapArgs)
   1097         std::swap(Op0, Op1);
   1098 
   1099       // Emit a compare of the LHS and RHS, setting the flags.
   1100       if (!X86FastEmitCompare(Op0, Op1, VT))
   1101         return false;
   1102 
   1103       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
   1104         .addMBB(TrueMBB);
   1105 
   1106       if (Predicate == CmpInst::FCMP_UNE) {
   1107         // X86 requires a second branch to handle UNE (and OEQ,
   1108         // which is mapped to UNE above).
   1109         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
   1110           .addMBB(TrueMBB);
   1111       }
   1112 
   1113       FastEmitBranch(FalseMBB, DL);
   1114       FuncInfo.MBB->addSuccessor(TrueMBB);
   1115       return true;
   1116     }
   1117   } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
   1118     // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
   1119     // typically happen for _Bool and C++ bools.
   1120     MVT SourceVT;
   1121     if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
   1122         isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
   1123       unsigned TestOpc = 0;
   1124       switch (SourceVT.SimpleTy) {
   1125       default: break;
   1126       case MVT::i8:  TestOpc = X86::TEST8ri; break;
   1127       case MVT::i16: TestOpc = X86::TEST16ri; break;
   1128       case MVT::i32: TestOpc = X86::TEST32ri; break;
   1129       case MVT::i64: TestOpc = X86::TEST64ri32; break;
   1130       }
   1131       if (TestOpc) {
   1132         unsigned OpReg = getRegForValue(TI->getOperand(0));
   1133         if (OpReg == 0) return false;
   1134         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
   1135           .addReg(OpReg).addImm(1);
   1136 
   1137         unsigned JmpOpc = X86::JNE_4;
   1138         if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
   1139           std::swap(TrueMBB, FalseMBB);
   1140           JmpOpc = X86::JE_4;
   1141         }
   1142 
   1143         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
   1144           .addMBB(TrueMBB);
   1145         FastEmitBranch(FalseMBB, DL);
   1146         FuncInfo.MBB->addSuccessor(TrueMBB);
   1147         return true;
   1148       }
   1149     }
   1150   }
   1151 
   1152   // Otherwise do a clumsy setcc and re-test it.
   1153   // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
   1154   // in an explicit cast, so make sure to handle that correctly.
   1155   unsigned OpReg = getRegForValue(BI->getCondition());
   1156   if (OpReg == 0) return false;
   1157 
   1158   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
   1159     .addReg(OpReg).addImm(1);
   1160   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
   1161     .addMBB(TrueMBB);
   1162   FastEmitBranch(FalseMBB, DL);
   1163   FuncInfo.MBB->addSuccessor(TrueMBB);
   1164   return true;
   1165 }
   1166 
   1167 bool X86FastISel::X86SelectShift(const Instruction *I) {
   1168   unsigned CReg = 0, OpReg = 0;
   1169   const TargetRegisterClass *RC = NULL;
   1170   if (I->getType()->isIntegerTy(8)) {
   1171     CReg = X86::CL;
   1172     RC = &X86::GR8RegClass;
   1173     switch (I->getOpcode()) {
   1174     case Instruction::LShr: OpReg = X86::SHR8rCL; break;
   1175     case Instruction::AShr: OpReg = X86::SAR8rCL; break;
   1176     case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
   1177     default: return false;
   1178     }
   1179   } else if (I->getType()->isIntegerTy(16)) {
   1180     CReg = X86::CX;
   1181     RC = &X86::GR16RegClass;
   1182     switch (I->getOpcode()) {
   1183     case Instruction::LShr: OpReg = X86::SHR16rCL; break;
   1184     case Instruction::AShr: OpReg = X86::SAR16rCL; break;
   1185     case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
   1186     default: return false;
   1187     }
   1188   } else if (I->getType()->isIntegerTy(32)) {
   1189     CReg = X86::ECX;
   1190     RC = &X86::GR32RegClass;
   1191     switch (I->getOpcode()) {
   1192     case Instruction::LShr: OpReg = X86::SHR32rCL; break;
   1193     case Instruction::AShr: OpReg = X86::SAR32rCL; break;
   1194     case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
   1195     default: return false;
   1196     }
   1197   } else if (I->getType()->isIntegerTy(64)) {
   1198     CReg = X86::RCX;
   1199     RC = &X86::GR64RegClass;
   1200     switch (I->getOpcode()) {
   1201     case Instruction::LShr: OpReg = X86::SHR64rCL; break;
   1202     case Instruction::AShr: OpReg = X86::SAR64rCL; break;
   1203     case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
   1204     default: return false;
   1205     }
   1206   } else {
   1207     return false;
   1208   }
   1209 
   1210   MVT VT;
   1211   if (!isTypeLegal(I->getType(), VT))
   1212     return false;
   1213 
   1214   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   1215   if (Op0Reg == 0) return false;
   1216 
   1217   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1218   if (Op1Reg == 0) return false;
   1219   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   1220           CReg).addReg(Op1Reg);
   1221 
   1222   // The shift instruction uses X86::CL. If we defined a super-register
   1223   // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
   1224   if (CReg != X86::CL)
   1225     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   1226             TII.get(TargetOpcode::KILL), X86::CL)
   1227       .addReg(CReg, RegState::Kill);
   1228 
   1229   unsigned ResultReg = createResultReg(RC);
   1230   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
   1231     .addReg(Op0Reg);
   1232   UpdateValueMap(I, ResultReg);
   1233   return true;
   1234 }
   1235 
   1236 bool X86FastISel::X86SelectSelect(const Instruction *I) {
   1237   MVT VT;
   1238   if (!isTypeLegal(I->getType(), VT))
   1239     return false;
   1240 
   1241   // We only use cmov here, if we don't have a cmov instruction bail.
   1242   if (!Subtarget->hasCMov()) return false;
   1243 
   1244   unsigned Opc = 0;
   1245   const TargetRegisterClass *RC = NULL;
   1246   if (VT == MVT::i16) {
   1247     Opc = X86::CMOVE16rr;
   1248     RC = &X86::GR16RegClass;
   1249   } else if (VT == MVT::i32) {
   1250     Opc = X86::CMOVE32rr;
   1251     RC = &X86::GR32RegClass;
   1252   } else if (VT == MVT::i64) {
   1253     Opc = X86::CMOVE64rr;
   1254     RC = &X86::GR64RegClass;
   1255   } else {
   1256     return false;
   1257   }
   1258 
   1259   unsigned Op0Reg = getRegForValue(I->getOperand(0));
   1260   if (Op0Reg == 0) return false;
   1261   unsigned Op1Reg = getRegForValue(I->getOperand(1));
   1262   if (Op1Reg == 0) return false;
   1263   unsigned Op2Reg = getRegForValue(I->getOperand(2));
   1264   if (Op2Reg == 0) return false;
   1265 
   1266   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
   1267     .addReg(Op0Reg).addReg(Op0Reg);
   1268   unsigned ResultReg = createResultReg(RC);
   1269   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
   1270     .addReg(Op1Reg).addReg(Op2Reg);
   1271   UpdateValueMap(I, ResultReg);
   1272   return true;
   1273 }
   1274 
   1275 bool X86FastISel::X86SelectFPExt(const Instruction *I) {
   1276   // fpext from float to double.
   1277   if (X86ScalarSSEf64 &&
   1278       I->getType()->isDoubleTy()) {
   1279     const Value *V = I->getOperand(0);
   1280     if (V->getType()->isFloatTy()) {
   1281       unsigned OpReg = getRegForValue(V);
   1282       if (OpReg == 0) return false;
   1283       unsigned ResultReg = createResultReg(&X86::FR64RegClass);
   1284       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   1285               TII.get(X86::CVTSS2SDrr), ResultReg)
   1286         .addReg(OpReg);
   1287       UpdateValueMap(I, ResultReg);
   1288       return true;
   1289     }
   1290   }
   1291 
   1292   return false;
   1293 }
   1294 
   1295 bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
   1296   if (X86ScalarSSEf64) {
   1297     if (I->getType()->isFloatTy()) {
   1298       const Value *V = I->getOperand(0);
   1299       if (V->getType()->isDoubleTy()) {
   1300         unsigned OpReg = getRegForValue(V);
   1301         if (OpReg == 0) return false;
   1302         unsigned ResultReg = createResultReg(&X86::FR32RegClass);
   1303         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   1304                 TII.get(X86::CVTSD2SSrr), ResultReg)
   1305           .addReg(OpReg);
   1306         UpdateValueMap(I, ResultReg);
   1307         return true;
   1308       }
   1309     }
   1310   }
   1311 
   1312   return false;
   1313 }
   1314 
   1315 bool X86FastISel::X86SelectTrunc(const Instruction *I) {
   1316   EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   1317   EVT DstVT = TLI.getValueType(I->getType());
   1318 
   1319   // This code only handles truncation to byte.
   1320   if (DstVT != MVT::i8 && DstVT != MVT::i1)
   1321     return false;
   1322   if (!TLI.isTypeLegal(SrcVT))
   1323     return false;
   1324 
   1325   unsigned InputReg = getRegForValue(I->getOperand(0));
   1326   if (!InputReg)
   1327     // Unhandled operand.  Halt "fast" selection and bail.
   1328     return false;
   1329 
   1330   if (SrcVT == MVT::i8) {
   1331     // Truncate from i8 to i1; no code needed.
   1332     UpdateValueMap(I, InputReg);
   1333     return true;
   1334   }
   1335 
   1336   if (!Subtarget->is64Bit()) {
   1337     // If we're on x86-32; we can't extract an i8 from a general register.
   1338     // First issue a copy to GR16_ABCD or GR32_ABCD.
   1339     const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
   1340       (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
   1341       (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
   1342     unsigned CopyReg = createResultReg(CopyRC);
   1343     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   1344             CopyReg).addReg(InputReg);
   1345     InputReg = CopyReg;
   1346   }
   1347 
   1348   // Issue an extract_subreg.
   1349   unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
   1350                                                   InputReg, /*Kill=*/true,
   1351                                                   X86::sub_8bit);
   1352   if (!ResultReg)
   1353     return false;
   1354 
   1355   UpdateValueMap(I, ResultReg);
   1356   return true;
   1357 }
   1358 
   1359 bool X86FastISel::IsMemcpySmall(uint64_t Len) {
   1360   return Len <= (Subtarget->is64Bit() ? 32 : 16);
   1361 }
   1362 
   1363 bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
   1364                                      X86AddressMode SrcAM, uint64_t Len) {
   1365 
   1366   // Make sure we don't bloat code by inlining very large memcpy's.
   1367   if (!IsMemcpySmall(Len))
   1368     return false;
   1369 
   1370   bool i64Legal = Subtarget->is64Bit();
   1371 
   1372   // We don't care about alignment here since we just emit integer accesses.
   1373   while (Len) {
   1374     MVT VT;
   1375     if (Len >= 8 && i64Legal)
   1376       VT = MVT::i64;
   1377     else if (Len >= 4)
   1378       VT = MVT::i32;
   1379     else if (Len >= 2)
   1380       VT = MVT::i16;
   1381     else {
   1382       VT = MVT::i8;
   1383     }
   1384 
   1385     unsigned Reg;
   1386     bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
   1387     RV &= X86FastEmitStore(VT, Reg, DestAM);
   1388     assert(RV && "Failed to emit load or store??");
   1389 
   1390     unsigned Size = VT.getSizeInBits()/8;
   1391     Len -= Size;
   1392     DestAM.Disp += Size;
   1393     SrcAM.Disp += Size;
   1394   }
   1395 
   1396   return true;
   1397 }
   1398 
   1399 bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
   1400   // FIXME: Handle more intrinsics.
   1401   switch (I.getIntrinsicID()) {
   1402   default: return false;
   1403   case Intrinsic::memcpy: {
   1404     const MemCpyInst &MCI = cast<MemCpyInst>(I);
   1405     // Don't handle volatile or variable length memcpys.
   1406     if (MCI.isVolatile())
   1407       return false;
   1408 
   1409     if (isa<ConstantInt>(MCI.getLength())) {
   1410       // Small memcpy's are common enough that we want to do them
   1411       // without a call if possible.
   1412       uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
   1413       if (IsMemcpySmall(Len)) {
   1414         X86AddressMode DestAM, SrcAM;
   1415         if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
   1416             !X86SelectAddress(MCI.getRawSource(), SrcAM))
   1417           return false;
   1418         TryEmitSmallMemcpy(DestAM, SrcAM, Len);
   1419         return true;
   1420       }
   1421     }
   1422 
   1423     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
   1424     if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
   1425       return false;
   1426 
   1427     if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
   1428       return false;
   1429 
   1430     return DoSelectCall(&I, "memcpy");
   1431   }
   1432   case Intrinsic::memset: {
   1433     const MemSetInst &MSI = cast<MemSetInst>(I);
   1434 
   1435     if (MSI.isVolatile())
   1436       return false;
   1437 
   1438     unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
   1439     if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
   1440       return false;
   1441 
   1442     if (MSI.getDestAddressSpace() > 255)
   1443       return false;
   1444 
   1445     return DoSelectCall(&I, "memset");
   1446   }
   1447   case Intrinsic::stackprotector: {
   1448     // Emit code to store the stack guard onto the stack.
   1449     EVT PtrTy = TLI.getPointerTy();
   1450 
   1451     const Value *Op1 = I.getArgOperand(0); // The guard's value.
   1452     const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
   1453 
   1454     // Grab the frame index.
   1455     X86AddressMode AM;
   1456     if (!X86SelectAddress(Slot, AM)) return false;
   1457     if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
   1458     return true;
   1459   }
   1460   case Intrinsic::dbg_declare: {
   1461     const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
   1462     X86AddressMode AM;
   1463     assert(DI->getAddress() && "Null address should be checked earlier!");
   1464     if (!X86SelectAddress(DI->getAddress(), AM))
   1465       return false;
   1466     const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
   1467     // FIXME may need to add RegState::Debug to any registers produced,
   1468     // although ESP/EBP should be the only ones at the moment.
   1469     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
   1470       addImm(0).addMetadata(DI->getVariable());
   1471     return true;
   1472   }
   1473   case Intrinsic::trap: {
   1474     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
   1475     return true;
   1476   }
   1477   case Intrinsic::sadd_with_overflow:
   1478   case Intrinsic::uadd_with_overflow: {
   1479     // FIXME: Should fold immediates.
   1480 
   1481     // Replace "add with overflow" intrinsics with an "add" instruction followed
   1482     // by a seto/setc instruction.
   1483     const Function *Callee = I.getCalledFunction();
   1484     Type *RetTy =
   1485       cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
   1486 
   1487     MVT VT;
   1488     if (!isTypeLegal(RetTy, VT))
   1489       return false;
   1490 
   1491     const Value *Op1 = I.getArgOperand(0);
   1492     const Value *Op2 = I.getArgOperand(1);
   1493     unsigned Reg1 = getRegForValue(Op1);
   1494     unsigned Reg2 = getRegForValue(Op2);
   1495 
   1496     if (Reg1 == 0 || Reg2 == 0)
   1497       // FIXME: Handle values *not* in registers.
   1498       return false;
   1499 
   1500     unsigned OpC = 0;
   1501     if (VT == MVT::i32)
   1502       OpC = X86::ADD32rr;
   1503     else if (VT == MVT::i64)
   1504       OpC = X86::ADD64rr;
   1505     else
   1506       return false;
   1507 
   1508     // The call to CreateRegs builds two sequential registers, to store the
   1509     // both the returned values.
   1510     unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
   1511     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
   1512       .addReg(Reg1).addReg(Reg2);
   1513 
   1514     unsigned Opc = X86::SETBr;
   1515     if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
   1516       Opc = X86::SETOr;
   1517     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
   1518 
   1519     UpdateValueMap(&I, ResultReg, 2);
   1520     return true;
   1521   }
   1522   }
   1523 }
   1524 
   1525 bool X86FastISel::FastLowerArguments() {
   1526   if (!FuncInfo.CanLowerReturn)
   1527     return false;
   1528 
   1529   if (Subtarget->isTargetWindows())
   1530     return false;
   1531 
   1532   const Function *F = FuncInfo.Fn;
   1533   if (F->isVarArg())
   1534     return false;
   1535 
   1536   CallingConv::ID CC = F->getCallingConv();
   1537   if (CC != CallingConv::C)
   1538     return false;
   1539 
   1540   if (!Subtarget->is64Bit())
   1541     return false;
   1542 
   1543   // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
   1544   unsigned Idx = 1;
   1545   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   1546        I != E; ++I, ++Idx) {
   1547     if (Idx > 6)
   1548       return false;
   1549 
   1550     if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
   1551         F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
   1552         F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
   1553         F->getAttributes().hasAttribute(Idx, Attribute::Nest))
   1554       return false;
   1555 
   1556     Type *ArgTy = I->getType();
   1557     if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
   1558       return false;
   1559 
   1560     EVT ArgVT = TLI.getValueType(ArgTy);
   1561     if (!ArgVT.isSimple()) return false;
   1562     switch (ArgVT.getSimpleVT().SimpleTy) {
   1563     case MVT::i32:
   1564     case MVT::i64:
   1565       break;
   1566     default:
   1567       return false;
   1568     }
   1569   }
   1570 
   1571   static const uint16_t GPR32ArgRegs[] = {
   1572     X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
   1573   };
   1574   static const uint16_t GPR64ArgRegs[] = {
   1575     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
   1576   };
   1577 
   1578   Idx = 0;
   1579   const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
   1580   const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
   1581   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   1582        I != E; ++I, ++Idx) {
   1583     if (I->use_empty())
   1584       continue;
   1585     bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
   1586     const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
   1587     unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
   1588     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
   1589     // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
   1590     // Without this, EmitLiveInCopies may eliminate the livein if its only
   1591     // use is a bitcast (which isn't turned into an instruction).
   1592     unsigned ResultReg = createResultReg(RC);
   1593     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   1594             ResultReg).addReg(DstReg, getKillRegState(true));
   1595     UpdateValueMap(I, ResultReg);
   1596   }
   1597   return true;
   1598 }
   1599 
   1600 bool X86FastISel::X86SelectCall(const Instruction *I) {
   1601   const CallInst *CI = cast<CallInst>(I);
   1602   const Value *Callee = CI->getCalledValue();
   1603 
   1604   // Can't handle inline asm yet.
   1605   if (isa<InlineAsm>(Callee))
   1606     return false;
   1607 
   1608   // Handle intrinsic calls.
   1609   if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
   1610     return X86VisitIntrinsicCall(*II);
   1611 
   1612   // Allow SelectionDAG isel to handle tail calls.
   1613   if (cast<CallInst>(I)->isTailCall())
   1614     return false;
   1615 
   1616   return DoSelectCall(I, 0);
   1617 }
   1618 
   1619 static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
   1620                                            const ImmutableCallSite &CS) {
   1621   if (Subtarget.is64Bit())
   1622     return 0;
   1623   if (Subtarget.isTargetWindows())
   1624     return 0;
   1625   CallingConv::ID CC = CS.getCallingConv();
   1626   if (CC == CallingConv::Fast || CC == CallingConv::GHC)
   1627     return 0;
   1628   if (!CS.paramHasAttr(1, Attribute::StructRet))
   1629     return 0;
   1630   if (CS.paramHasAttr(1, Attribute::InReg))
   1631     return 0;
   1632   return 4;
   1633 }
   1634 
   1635 // Select either a call, or an llvm.memcpy/memmove/memset intrinsic
   1636 bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   1637   const CallInst *CI = cast<CallInst>(I);
   1638   const Value *Callee = CI->getCalledValue();
   1639 
   1640   // Handle only C and fastcc calling conventions for now.
   1641   ImmutableCallSite CS(CI);
   1642   CallingConv::ID CC = CS.getCallingConv();
   1643   if (CC != CallingConv::C && CC != CallingConv::Fast &&
   1644       CC != CallingConv::X86_FastCall)
   1645     return false;
   1646 
   1647   // fastcc with -tailcallopt is intended to provide a guaranteed
   1648   // tail call optimization. Fastisel doesn't know how to do that.
   1649   if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
   1650     return false;
   1651 
   1652   PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
   1653   FunctionType *FTy = cast<FunctionType>(PT->getElementType());
   1654   bool isVarArg = FTy->isVarArg();
   1655 
   1656   // Don't know how to handle Win64 varargs yet.  Nothing special needed for
   1657   // x86-32.  Special handling for x86-64 is implemented.
   1658   if (isVarArg && Subtarget->isTargetWin64())
   1659     return false;
   1660 
   1661   // Fast-isel doesn't know about callee-pop yet.
   1662   if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
   1663                        TM.Options.GuaranteedTailCallOpt))
   1664     return false;
   1665 
   1666   // Check whether the function can return without sret-demotion.
   1667   SmallVector<ISD::OutputArg, 4> Outs;
   1668   GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
   1669   bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
   1670                                            *FuncInfo.MF, FTy->isVarArg(),
   1671                                            Outs, FTy->getContext());
   1672   if (!CanLowerReturn)
   1673     return false;
   1674 
   1675   // Materialize callee address in a register. FIXME: GV address can be
   1676   // handled with a CALLpcrel32 instead.
   1677   X86AddressMode CalleeAM;
   1678   if (!X86SelectCallAddress(Callee, CalleeAM))
   1679     return false;
   1680   unsigned CalleeOp = 0;
   1681   const GlobalValue *GV = 0;
   1682   if (CalleeAM.GV != 0) {
   1683     GV = CalleeAM.GV;
   1684   } else if (CalleeAM.Base.Reg != 0) {
   1685     CalleeOp = CalleeAM.Base.Reg;
   1686   } else
   1687     return false;
   1688 
   1689   // Deal with call operands first.
   1690   SmallVector<const Value *, 8> ArgVals;
   1691   SmallVector<unsigned, 8> Args;
   1692   SmallVector<MVT, 8> ArgVTs;
   1693   SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
   1694   unsigned arg_size = CS.arg_size();
   1695   Args.reserve(arg_size);
   1696   ArgVals.reserve(arg_size);
   1697   ArgVTs.reserve(arg_size);
   1698   ArgFlags.reserve(arg_size);
   1699   for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
   1700        i != e; ++i) {
   1701     // If we're lowering a mem intrinsic instead of a regular call, skip the
   1702     // last two arguments, which should not passed to the underlying functions.
   1703     if (MemIntName && e-i <= 2)
   1704       break;
   1705     Value *ArgVal = *i;
   1706     ISD::ArgFlagsTy Flags;
   1707     unsigned AttrInd = i - CS.arg_begin() + 1;
   1708     if (CS.paramHasAttr(AttrInd, Attribute::SExt))
   1709       Flags.setSExt();
   1710     if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
   1711       Flags.setZExt();
   1712 
   1713     if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
   1714       PointerType *Ty = cast<PointerType>(ArgVal->getType());
   1715       Type *ElementTy = Ty->getElementType();
   1716       unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
   1717       unsigned FrameAlign = CS.getParamAlignment(AttrInd);
   1718       if (!FrameAlign)
   1719         FrameAlign = TLI.getByValTypeAlignment(ElementTy);
   1720       Flags.setByVal();
   1721       Flags.setByValSize(FrameSize);
   1722       Flags.setByValAlign(FrameAlign);
   1723       if (!IsMemcpySmall(FrameSize))
   1724         return false;
   1725     }
   1726 
   1727     if (CS.paramHasAttr(AttrInd, Attribute::InReg))
   1728       Flags.setInReg();
   1729     if (CS.paramHasAttr(AttrInd, Attribute::Nest))
   1730       Flags.setNest();
   1731 
   1732     // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
   1733     // instruction.  This is safe because it is common to all fastisel supported
   1734     // calling conventions on x86.
   1735     if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
   1736       if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
   1737           CI->getBitWidth() == 16) {
   1738         if (Flags.isSExt())
   1739           ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
   1740         else
   1741           ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
   1742       }
   1743     }
   1744 
   1745     unsigned ArgReg;
   1746 
   1747     // Passing bools around ends up doing a trunc to i1 and passing it.
   1748     // Codegen this as an argument + "and 1".
   1749     if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
   1750         cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
   1751         ArgVal->hasOneUse()) {
   1752       ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
   1753       ArgReg = getRegForValue(ArgVal);
   1754       if (ArgReg == 0) return false;
   1755 
   1756       MVT ArgVT;
   1757       if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
   1758 
   1759       ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
   1760                            ArgVal->hasOneUse(), 1);
   1761     } else {
   1762       ArgReg = getRegForValue(ArgVal);
   1763     }
   1764 
   1765     if (ArgReg == 0) return false;
   1766 
   1767     Type *ArgTy = ArgVal->getType();
   1768     MVT ArgVT;
   1769     if (!isTypeLegal(ArgTy, ArgVT))
   1770       return false;
   1771     if (ArgVT == MVT::x86mmx)
   1772       return false;
   1773     unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
   1774     Flags.setOrigAlign(OriginalAlignment);
   1775 
   1776     Args.push_back(ArgReg);
   1777     ArgVals.push_back(ArgVal);
   1778     ArgVTs.push_back(ArgVT);
   1779     ArgFlags.push_back(Flags);
   1780   }
   1781 
   1782   // Analyze operands of the call, assigning locations to each operand.
   1783   SmallVector<CCValAssign, 16> ArgLocs;
   1784   CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
   1785                  I->getParent()->getContext());
   1786 
   1787   // Allocate shadow area for Win64
   1788   if (Subtarget->isTargetWin64())
   1789     CCInfo.AllocateStack(32, 8);
   1790 
   1791   CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
   1792 
   1793   // Get a count of how many bytes are to be pushed on the stack.
   1794   unsigned NumBytes = CCInfo.getNextStackOffset();
   1795 
   1796   // Issue CALLSEQ_START
   1797   unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   1798   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
   1799     .addImm(NumBytes);
   1800 
   1801   // Process argument: walk the register/memloc assignments, inserting
   1802   // copies / loads.
   1803   SmallVector<unsigned, 4> RegArgs;
   1804   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   1805     CCValAssign &VA = ArgLocs[i];
   1806     unsigned Arg = Args[VA.getValNo()];
   1807     EVT ArgVT = ArgVTs[VA.getValNo()];
   1808 
   1809     // Promote the value if needed.
   1810     switch (VA.getLocInfo()) {
   1811     case CCValAssign::Full: break;
   1812     case CCValAssign::SExt: {
   1813       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   1814              "Unexpected extend");
   1815       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
   1816                                        Arg, ArgVT, Arg);
   1817       assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
   1818       ArgVT = VA.getLocVT();
   1819       break;
   1820     }
   1821     case CCValAssign::ZExt: {
   1822       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   1823              "Unexpected extend");
   1824       bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
   1825                                        Arg, ArgVT, Arg);
   1826       assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
   1827       ArgVT = VA.getLocVT();
   1828       break;
   1829     }
   1830     case CCValAssign::AExt: {
   1831       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
   1832              "Unexpected extend");
   1833       bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
   1834                                        Arg, ArgVT, Arg);
   1835       if (!Emitted)
   1836         Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
   1837                                     Arg, ArgVT, Arg);
   1838       if (!Emitted)
   1839         Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
   1840                                     Arg, ArgVT, Arg);
   1841 
   1842       assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
   1843       ArgVT = VA.getLocVT();
   1844       break;
   1845     }
   1846     case CCValAssign::BCvt: {
   1847       unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
   1848                                ISD::BITCAST, Arg, /*TODO: Kill=*/false);
   1849       assert(BC != 0 && "Failed to emit a bitcast!");
   1850       Arg = BC;
   1851       ArgVT = VA.getLocVT();
   1852       break;
   1853     }
   1854     case CCValAssign::VExt:
   1855       // VExt has not been implemented, so this should be impossible to reach
   1856       // for now.  However, fallback to Selection DAG isel once implemented.
   1857       return false;
   1858     case CCValAssign::Indirect:
   1859       // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
   1860       // support this.
   1861       return false;
   1862     }
   1863 
   1864     if (VA.isRegLoc()) {
   1865       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   1866               VA.getLocReg()).addReg(Arg);
   1867       RegArgs.push_back(VA.getLocReg());
   1868     } else {
   1869       unsigned LocMemOffset = VA.getLocMemOffset();
   1870       X86AddressMode AM;
   1871       AM.Base.Reg = RegInfo->getStackRegister();
   1872       AM.Disp = LocMemOffset;
   1873       const Value *ArgVal = ArgVals[VA.getValNo()];
   1874       ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
   1875 
   1876       if (Flags.isByVal()) {
   1877         X86AddressMode SrcAM;
   1878         SrcAM.Base.Reg = Arg;
   1879         bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
   1880         assert(Res && "memcpy length already checked!"); (void)Res;
   1881       } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
   1882         // If this is a really simple value, emit this with the Value* version
   1883         // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
   1884         // as it can cause us to reevaluate the argument.
   1885         if (!X86FastEmitStore(ArgVT, ArgVal, AM))
   1886           return false;
   1887       } else {
   1888         if (!X86FastEmitStore(ArgVT, Arg, AM))
   1889           return false;
   1890       }
   1891     }
   1892   }
   1893 
   1894   // ELF / PIC requires GOT in the EBX register before function calls via PLT
   1895   // GOT pointer.
   1896   if (Subtarget->isPICStyleGOT()) {
   1897     unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   1898     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   1899             X86::EBX).addReg(Base);
   1900   }
   1901 
   1902   if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
   1903     // Count the number of XMM registers allocated.
   1904     static const uint16_t XMMArgRegs[] = {
   1905       X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
   1906       X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
   1907     };
   1908     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
   1909     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
   1910             X86::AL).addImm(NumXMMRegs);
   1911   }
   1912 
   1913   // Issue the call.
   1914   MachineInstrBuilder MIB;
   1915   if (CalleeOp) {
   1916     // Register-indirect call.
   1917     unsigned CallOpc;
   1918     if (Subtarget->is64Bit())
   1919       CallOpc = X86::CALL64r;
   1920     else
   1921       CallOpc = X86::CALL32r;
   1922     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
   1923       .addReg(CalleeOp);
   1924 
   1925   } else {
   1926     // Direct call.
   1927     assert(GV && "Not a direct call");
   1928     unsigned CallOpc;
   1929     if (Subtarget->is64Bit())
   1930       CallOpc = X86::CALL64pcrel32;
   1931     else
   1932       CallOpc = X86::CALLpcrel32;
   1933 
   1934     // See if we need any target-specific flags on the GV operand.
   1935     unsigned char OpFlags = 0;
   1936 
   1937     // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
   1938     // external symbols most go through the PLT in PIC mode.  If the symbol
   1939     // has hidden or protected visibility, or if it is static or local, then
   1940     // we don't need to use the PLT - we can directly call it.
   1941     if (Subtarget->isTargetELF() &&
   1942         TM.getRelocationModel() == Reloc::PIC_ &&
   1943         GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
   1944       OpFlags = X86II::MO_PLT;
   1945     } else if (Subtarget->isPICStyleStubAny() &&
   1946                (GV->isDeclaration() || GV->isWeakForLinker()) &&
   1947                (!Subtarget->getTargetTriple().isMacOSX() ||
   1948                 Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
   1949       // PC-relative references to external symbols should go through $stub,
   1950       // unless we're building with the leopard linker or later, which
   1951       // automatically synthesizes these stubs.
   1952       OpFlags = X86II::MO_DARWIN_STUB;
   1953     }
   1954 
   1955 
   1956     MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
   1957     if (MemIntName)
   1958       MIB.addExternalSymbol(MemIntName, OpFlags);
   1959     else
   1960       MIB.addGlobalAddress(GV, 0, OpFlags);
   1961   }
   1962 
   1963   // Add a register mask with the call-preserved registers.
   1964   // Proper defs for return values will be added by setPhysRegsDeadExcept().
   1965   MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
   1966 
   1967   // Add an implicit use GOT pointer in EBX.
   1968   if (Subtarget->isPICStyleGOT())
   1969     MIB.addReg(X86::EBX, RegState::Implicit);
   1970 
   1971   if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
   1972     MIB.addReg(X86::AL, RegState::Implicit);
   1973 
   1974   // Add implicit physical register uses to the call.
   1975   for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
   1976     MIB.addReg(RegArgs[i], RegState::Implicit);
   1977 
   1978   // Issue CALLSEQ_END
   1979   unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   1980   const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
   1981   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
   1982     .addImm(NumBytes).addImm(NumBytesCallee);
   1983 
   1984   // Build info for return calling conv lowering code.
   1985   // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
   1986   SmallVector<ISD::InputArg, 32> Ins;
   1987   SmallVector<EVT, 4> RetTys;
   1988   ComputeValueVTs(TLI, I->getType(), RetTys);
   1989   for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
   1990     EVT VT = RetTys[i];
   1991     MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
   1992     unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
   1993     for (unsigned j = 0; j != NumRegs; ++j) {
   1994       ISD::InputArg MyFlags;
   1995       MyFlags.VT = RegisterVT;
   1996       MyFlags.Used = !CS.getInstruction()->use_empty();
   1997       if (CS.paramHasAttr(0, Attribute::SExt))
   1998         MyFlags.Flags.setSExt();
   1999       if (CS.paramHasAttr(0, Attribute::ZExt))
   2000         MyFlags.Flags.setZExt();
   2001       if (CS.paramHasAttr(0, Attribute::InReg))
   2002         MyFlags.Flags.setInReg();
   2003       Ins.push_back(MyFlags);
   2004     }
   2005   }
   2006 
   2007   // Now handle call return values.
   2008   SmallVector<unsigned, 4> UsedRegs;
   2009   SmallVector<CCValAssign, 16> RVLocs;
   2010   CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
   2011                     I->getParent()->getContext());
   2012   unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
   2013   CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
   2014   for (unsigned i = 0; i != RVLocs.size(); ++i) {
   2015     EVT CopyVT = RVLocs[i].getValVT();
   2016     unsigned CopyReg = ResultReg + i;
   2017 
   2018     // If this is a call to a function that returns an fp value on the x87 fp
   2019     // stack, but where we prefer to use the value in xmm registers, copy it
   2020     // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
   2021     if ((RVLocs[i].getLocReg() == X86::ST0 ||
   2022          RVLocs[i].getLocReg() == X86::ST1)) {
   2023       if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
   2024         CopyVT = MVT::f80;
   2025         CopyReg = createResultReg(&X86::RFP80RegClass);
   2026       }
   2027       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
   2028               CopyReg);
   2029     } else {
   2030       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
   2031               CopyReg).addReg(RVLocs[i].getLocReg());
   2032       UsedRegs.push_back(RVLocs[i].getLocReg());
   2033     }
   2034 
   2035     if (CopyVT != RVLocs[i].getValVT()) {
   2036       // Round the F80 the right size, which also moves to the appropriate xmm
   2037       // register. This is accomplished by storing the F80 value in memory and
   2038       // then loading it back. Ewww...
   2039       EVT ResVT = RVLocs[i].getValVT();
   2040       unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
   2041       unsigned MemSize = ResVT.getSizeInBits()/8;
   2042       int FI = MFI.CreateStackObject(MemSize, MemSize, false);
   2043       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   2044                                 TII.get(Opc)), FI)
   2045         .addReg(CopyReg);
   2046       Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
   2047       addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   2048                                 TII.get(Opc), ResultReg + i), FI);
   2049     }
   2050   }
   2051 
   2052   if (RVLocs.size())
   2053     UpdateValueMap(I, ResultReg, RVLocs.size());
   2054 
   2055   // Set all unused physreg defs as dead.
   2056   static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
   2057 
   2058   return true;
   2059 }
   2060 
   2061 
   2062 bool
   2063 X86FastISel::TargetSelectInstruction(const Instruction *I)  {
   2064   switch (I->getOpcode()) {
   2065   default: break;
   2066   case Instruction::Load:
   2067     return X86SelectLoad(I);
   2068   case Instruction::Store:
   2069     return X86SelectStore(I);
   2070   case Instruction::Ret:
   2071     return X86SelectRet(I);
   2072   case Instruction::ICmp:
   2073   case Instruction::FCmp:
   2074     return X86SelectCmp(I);
   2075   case Instruction::ZExt:
   2076     return X86SelectZExt(I);
   2077   case Instruction::Br:
   2078     return X86SelectBranch(I);
   2079   case Instruction::Call:
   2080     return X86SelectCall(I);
   2081   case Instruction::LShr:
   2082   case Instruction::AShr:
   2083   case Instruction::Shl:
   2084     return X86SelectShift(I);
   2085   case Instruction::Select:
   2086     return X86SelectSelect(I);
   2087   case Instruction::Trunc:
   2088     return X86SelectTrunc(I);
   2089   case Instruction::FPExt:
   2090     return X86SelectFPExt(I);
   2091   case Instruction::FPTrunc:
   2092     return X86SelectFPTrunc(I);
   2093   case Instruction::IntToPtr: // Deliberate fall-through.
   2094   case Instruction::PtrToInt: {
   2095     EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
   2096     EVT DstVT = TLI.getValueType(I->getType());
   2097     if (DstVT.bitsGT(SrcVT))
   2098       return X86SelectZExt(I);
   2099     if (DstVT.bitsLT(SrcVT))
   2100       return X86SelectTrunc(I);
   2101     unsigned Reg = getRegForValue(I->getOperand(0));
   2102     if (Reg == 0) return false;
   2103     UpdateValueMap(I, Reg);
   2104     return true;
   2105   }
   2106   }
   2107 
   2108   return false;
   2109 }
   2110 
   2111 unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
   2112   MVT VT;
   2113   if (!isTypeLegal(C->getType(), VT))
   2114     return 0;
   2115 
   2116   // Can't handle alternate code models yet.
   2117   if (TM.getCodeModel() != CodeModel::Small)
   2118     return 0;
   2119 
   2120   // Get opcode and regclass of the output for the given load instruction.
   2121   unsigned Opc = 0;
   2122   const TargetRegisterClass *RC = NULL;
   2123   switch (VT.SimpleTy) {
   2124   default: return 0;
   2125   case MVT::i8:
   2126     Opc = X86::MOV8rm;
   2127     RC  = &X86::GR8RegClass;
   2128     break;
   2129   case MVT::i16:
   2130     Opc = X86::MOV16rm;
   2131     RC  = &X86::GR16RegClass;
   2132     break;
   2133   case MVT::i32:
   2134     Opc = X86::MOV32rm;
   2135     RC  = &X86::GR32RegClass;
   2136     break;
   2137   case MVT::i64:
   2138     // Must be in x86-64 mode.
   2139     Opc = X86::MOV64rm;
   2140     RC  = &X86::GR64RegClass;
   2141     break;
   2142   case MVT::f32:
   2143     if (X86ScalarSSEf32) {
   2144       Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
   2145       RC  = &X86::FR32RegClass;
   2146     } else {
   2147       Opc = X86::LD_Fp32m;
   2148       RC  = &X86::RFP32RegClass;
   2149     }
   2150     break;
   2151   case MVT::f64:
   2152     if (X86ScalarSSEf64) {
   2153       Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
   2154       RC  = &X86::FR64RegClass;
   2155     } else {
   2156       Opc = X86::LD_Fp64m;
   2157       RC  = &X86::RFP64RegClass;
   2158     }
   2159     break;
   2160   case MVT::f80:
   2161     // No f80 support yet.
   2162     return 0;
   2163   }
   2164 
   2165   // Materialize addresses with LEA instructions.
   2166   if (isa<GlobalValue>(C)) {
   2167     X86AddressMode AM;
   2168     if (X86SelectAddress(C, AM)) {
   2169       // If the expression is just a basereg, then we're done, otherwise we need
   2170       // to emit an LEA.
   2171       if (AM.BaseType == X86AddressMode::RegBase &&
   2172           AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
   2173         return AM.Base.Reg;
   2174 
   2175       Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
   2176       unsigned ResultReg = createResultReg(RC);
   2177       addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   2178                              TII.get(Opc), ResultReg), AM);
   2179       return ResultReg;
   2180     }
   2181     return 0;
   2182   }
   2183 
   2184   // MachineConstantPool wants an explicit alignment.
   2185   unsigned Align = TD.getPrefTypeAlignment(C->getType());
   2186   if (Align == 0) {
   2187     // Alignment of vector types.  FIXME!
   2188     Align = TD.getTypeAllocSize(C->getType());
   2189   }
   2190 
   2191   // x86-32 PIC requires a PIC base register for constant pools.
   2192   unsigned PICBase = 0;
   2193   unsigned char OpFlag = 0;
   2194   if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
   2195     OpFlag = X86II::MO_PIC_BASE_OFFSET;
   2196     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   2197   } else if (Subtarget->isPICStyleGOT()) {
   2198     OpFlag = X86II::MO_GOTOFF;
   2199     PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
   2200   } else if (Subtarget->isPICStyleRIPRel() &&
   2201              TM.getCodeModel() == CodeModel::Small) {
   2202     PICBase = X86::RIP;
   2203   }
   2204 
   2205   // Create the load from the constant pool.
   2206   unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
   2207   unsigned ResultReg = createResultReg(RC);
   2208   addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   2209                                    TII.get(Opc), ResultReg),
   2210                            MCPOffset, PICBase, OpFlag);
   2211 
   2212   return ResultReg;
   2213 }
   2214 
   2215 unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
   2216   // Fail on dynamic allocas. At this point, getRegForValue has already
   2217   // checked its CSE maps, so if we're here trying to handle a dynamic
   2218   // alloca, we're not going to succeed. X86SelectAddress has a
   2219   // check for dynamic allocas, because it's called directly from
   2220   // various places, but TargetMaterializeAlloca also needs a check
   2221   // in order to avoid recursion between getRegForValue,
   2222   // X86SelectAddrss, and TargetMaterializeAlloca.
   2223   if (!FuncInfo.StaticAllocaMap.count(C))
   2224     return 0;
   2225 
   2226   X86AddressMode AM;
   2227   if (!X86SelectAddress(C, AM))
   2228     return 0;
   2229   unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
   2230   const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
   2231   unsigned ResultReg = createResultReg(RC);
   2232   addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
   2233                          TII.get(Opc), ResultReg), AM);
   2234   return ResultReg;
   2235 }
   2236 
   2237 unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
   2238   MVT VT;
   2239   if (!isTypeLegal(CF->getType(), VT))
   2240     return 0;
   2241 
   2242   // Get opcode and regclass for the given zero.
   2243   unsigned Opc = 0;
   2244   const TargetRegisterClass *RC = NULL;
   2245   switch (VT.SimpleTy) {
   2246   default: return 0;
   2247   case MVT::f32:
   2248     if (X86ScalarSSEf32) {
   2249       Opc = X86::FsFLD0SS;
   2250       RC  = &X86::FR32RegClass;
   2251     } else {
   2252       Opc = X86::LD_Fp032;
   2253       RC  = &X86::RFP32RegClass;
   2254     }
   2255     break;
   2256   case MVT::f64:
   2257     if (X86ScalarSSEf64) {
   2258       Opc = X86::FsFLD0SD;
   2259       RC  = &X86::FR64RegClass;
   2260     } else {
   2261       Opc = X86::LD_Fp064;
   2262       RC  = &X86::RFP64RegClass;
   2263     }
   2264     break;
   2265   case MVT::f80:
   2266     // No f80 support yet.
   2267     return 0;
   2268   }
   2269 
   2270   unsigned ResultReg = createResultReg(RC);
   2271   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
   2272   return ResultReg;
   2273 }
   2274 
   2275 
   2276 /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
   2277 /// vreg is being provided by the specified load instruction.  If possible,
   2278 /// try to fold the load as an operand to the instruction, returning true if
   2279 /// possible.
   2280 bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
   2281                                 const LoadInst *LI) {
   2282   X86AddressMode AM;
   2283   if (!X86SelectAddress(LI->getOperand(0), AM))
   2284     return false;
   2285 
   2286   const X86InstrInfo &XII = (const X86InstrInfo&)TII;
   2287 
   2288   unsigned Size = TD.getTypeAllocSize(LI->getType());
   2289   unsigned Alignment = LI->getAlignment();
   2290 
   2291   SmallVector<MachineOperand, 8> AddrOps;
   2292   AM.getFullAddress(AddrOps);
   2293 
   2294   MachineInstr *Result =
   2295     XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
   2296   if (Result == 0) return false;
   2297 
   2298   FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
   2299   MI->eraseFromParent();
   2300   return true;
   2301 }
   2302 
   2303 
   2304 namespace llvm {
   2305   FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
   2306                                 const TargetLibraryInfo *libInfo) {
   2307     return new X86FastISel(funcInfo, libInfo);
   2308   }
   2309 }
   2310