Home | History | Annotate | Download | only in X86
      1 //===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file defines the interfaces that X86 uses to lower LLVM code into a
     11 // selection DAG.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "X86ISelLowering.h"
     16 #include "Utils/X86ShuffleDecode.h"
     17 #include "X86CallingConv.h"
     18 #include "X86FrameLowering.h"
     19 #include "X86InstrBuilder.h"
     20 #include "X86MachineFunctionInfo.h"
     21 #include "X86ShuffleDecodeConstantPool.h"
     22 #include "X86TargetMachine.h"
     23 #include "X86TargetObjectFile.h"
     24 #include "llvm/ADT/SmallBitVector.h"
     25 #include "llvm/ADT/SmallSet.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/ADT/StringExtras.h"
     28 #include "llvm/ADT/StringSwitch.h"
     29 #include "llvm/Analysis/EHPersonalities.h"
     30 #include "llvm/CodeGen/IntrinsicLowering.h"
     31 #include "llvm/CodeGen/MachineFrameInfo.h"
     32 #include "llvm/CodeGen/MachineFunction.h"
     33 #include "llvm/CodeGen/MachineInstrBuilder.h"
     34 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     35 #include "llvm/CodeGen/MachineModuleInfo.h"
     36 #include "llvm/CodeGen/MachineRegisterInfo.h"
     37 #include "llvm/CodeGen/WinEHFuncInfo.h"
     38 #include "llvm/IR/CallSite.h"
     39 #include "llvm/IR/CallingConv.h"
     40 #include "llvm/IR/Constants.h"
     41 #include "llvm/IR/DerivedTypes.h"
     42 #include "llvm/IR/Function.h"
     43 #include "llvm/IR/GlobalAlias.h"
     44 #include "llvm/IR/GlobalVariable.h"
     45 #include "llvm/IR/Instructions.h"
     46 #include "llvm/IR/Intrinsics.h"
     47 #include "llvm/MC/MCAsmInfo.h"
     48 #include "llvm/MC/MCContext.h"
     49 #include "llvm/MC/MCExpr.h"
     50 #include "llvm/MC/MCSymbol.h"
     51 #include "llvm/Support/CommandLine.h"
     52 #include "llvm/Support/Debug.h"
     53 #include "llvm/Support/ErrorHandling.h"
     54 #include "llvm/Support/MathExtras.h"
     55 #include "llvm/Target/TargetOptions.h"
     56 #include "X86IntrinsicsInfo.h"
     57 #include <bitset>
     58 #include <numeric>
     59 #include <cctype>
     60 using namespace llvm;
     61 
     62 #define DEBUG_TYPE "x86-isel"
     63 
     64 STATISTIC(NumTailCalls, "Number of tail calls");
     65 
     66 static cl::opt<bool> ExperimentalVectorWideningLegalization(
     67     "x86-experimental-vector-widening-legalization", cl::init(false),
     68     cl::desc("Enable an experimental vector type legalization through widening "
     69              "rather than promotion."),
     70     cl::Hidden);
     71 
     72 X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     73                                      const X86Subtarget &STI)
     74     : TargetLowering(TM), Subtarget(STI) {
     75   bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
     76   X86ScalarSSEf64 = Subtarget.hasSSE2();
     77   X86ScalarSSEf32 = Subtarget.hasSSE1();
     78   MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
     79 
     80   // Set up the TargetLowering object.
     81 
     82   // X86 is weird. It always uses i8 for shift amounts and setcc results.
     83   setBooleanContents(ZeroOrOneBooleanContent);
     84   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
     85   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
     86 
     87   // For 64-bit, since we have so many registers, use the ILP scheduler.
     88   // For 32-bit, use the register pressure specific scheduling.
     89   // For Atom, always use ILP scheduling.
     90   if (Subtarget.isAtom())
     91     setSchedulingPreference(Sched::ILP);
     92   else if (Subtarget.is64Bit())
     93     setSchedulingPreference(Sched::ILP);
     94   else
     95     setSchedulingPreference(Sched::RegPressure);
     96   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
     97   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
     98 
     99   // Bypass expensive divides on Atom when compiling with O2.
    100   if (TM.getOptLevel() >= CodeGenOpt::Default) {
    101     if (Subtarget.hasSlowDivide32())
    102       addBypassSlowDiv(32, 8);
    103     if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
    104       addBypassSlowDiv(64, 16);
    105   }
    106 
    107   if (Subtarget.isTargetKnownWindowsMSVC()) {
    108     // Setup Windows compiler runtime calls.
    109     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
    110     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
    111     setLibcallName(RTLIB::SREM_I64, "_allrem");
    112     setLibcallName(RTLIB::UREM_I64, "_aullrem");
    113     setLibcallName(RTLIB::MUL_I64, "_allmul");
    114     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
    115     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
    116     setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
    117     setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
    118     setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
    119   }
    120 
    121   if (Subtarget.isTargetDarwin()) {
    122     // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
    123     setUseUnderscoreSetJmp(false);
    124     setUseUnderscoreLongJmp(false);
    125   } else if (Subtarget.isTargetWindowsGNU()) {
    126     // MS runtime is weird: it exports _setjmp, but longjmp!
    127     setUseUnderscoreSetJmp(true);
    128     setUseUnderscoreLongJmp(false);
    129   } else {
    130     setUseUnderscoreSetJmp(true);
    131     setUseUnderscoreLongJmp(true);
    132   }
    133 
    134   // Set up the register classes.
    135   addRegisterClass(MVT::i8, &X86::GR8RegClass);
    136   addRegisterClass(MVT::i16, &X86::GR16RegClass);
    137   addRegisterClass(MVT::i32, &X86::GR32RegClass);
    138   if (Subtarget.is64Bit())
    139     addRegisterClass(MVT::i64, &X86::GR64RegClass);
    140 
    141   for (MVT VT : MVT::integer_valuetypes())
    142     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
    143 
    144   // We don't accept any truncstore of integer registers.
    145   setTruncStoreAction(MVT::i64, MVT::i32, Expand);
    146   setTruncStoreAction(MVT::i64, MVT::i16, Expand);
    147   setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
    148   setTruncStoreAction(MVT::i32, MVT::i16, Expand);
    149   setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
    150   setTruncStoreAction(MVT::i16, MVT::i8,  Expand);
    151 
    152   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    153 
    154   // SETOEQ and SETUNE require checking two conditions.
    155   setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
    156   setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
    157   setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
    158   setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
    159   setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
    160   setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
    161 
    162   // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
    163   // operation.
    164   setOperationAction(ISD::UINT_TO_FP       , MVT::i1   , Promote);
    165   setOperationAction(ISD::UINT_TO_FP       , MVT::i8   , Promote);
    166   setOperationAction(ISD::UINT_TO_FP       , MVT::i16  , Promote);
    167 
    168   if (Subtarget.is64Bit()) {
    169     if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
    170       // f32/f64 are legal, f80 is custom.
    171       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Custom);
    172     else
    173       setOperationAction(ISD::UINT_TO_FP   , MVT::i32  , Promote);
    174     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
    175   } else if (!Subtarget.useSoftFloat()) {
    176     // We have an algorithm for SSE2->double, and we turn this into a
    177     // 64-bit FILD followed by conditional FADD for other targets.
    178     setOperationAction(ISD::UINT_TO_FP     , MVT::i64  , Custom);
    179     // We have an algorithm for SSE2, and we turn this into a 64-bit
    180     // FILD or VCVTUSI2SS/SD for other targets.
    181     setOperationAction(ISD::UINT_TO_FP     , MVT::i32  , Custom);
    182   }
    183 
    184   // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
    185   // this operation.
    186   setOperationAction(ISD::SINT_TO_FP       , MVT::i1   , Promote);
    187   setOperationAction(ISD::SINT_TO_FP       , MVT::i8   , Promote);
    188 
    189   if (!Subtarget.useSoftFloat()) {
    190     // SSE has no i16 to fp conversion, only i32
    191     if (X86ScalarSSEf32) {
    192       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
    193       // f32 and f64 cases are Legal, f80 case is not
    194       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
    195     } else {
    196       setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Custom);
    197       setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Custom);
    198     }
    199   } else {
    200     setOperationAction(ISD::SINT_TO_FP     , MVT::i16  , Promote);
    201     setOperationAction(ISD::SINT_TO_FP     , MVT::i32  , Promote);
    202   }
    203 
    204   // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
    205   // this operation.
    206   setOperationAction(ISD::FP_TO_SINT       , MVT::i1   , Promote);
    207   setOperationAction(ISD::FP_TO_SINT       , MVT::i8   , Promote);
    208 
    209   if (!Subtarget.useSoftFloat()) {
    210     // In 32-bit mode these are custom lowered.  In 64-bit mode F32 and F64
    211     // are Legal, f80 is custom lowered.
    212     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Custom);
    213     setOperationAction(ISD::SINT_TO_FP     , MVT::i64  , Custom);
    214 
    215     if (X86ScalarSSEf32) {
    216       setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
    217       // f32 and f64 cases are Legal, f80 case is not
    218       setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
    219     } else {
    220       setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Custom);
    221       setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Custom);
    222     }
    223   } else {
    224     setOperationAction(ISD::FP_TO_SINT     , MVT::i16  , Promote);
    225     setOperationAction(ISD::FP_TO_SINT     , MVT::i32  , Expand);
    226     setOperationAction(ISD::FP_TO_SINT     , MVT::i64  , Expand);
    227   }
    228 
    229   // Handle FP_TO_UINT by promoting the destination to a larger signed
    230   // conversion.
    231   setOperationAction(ISD::FP_TO_UINT       , MVT::i1   , Promote);
    232   setOperationAction(ISD::FP_TO_UINT       , MVT::i8   , Promote);
    233   setOperationAction(ISD::FP_TO_UINT       , MVT::i16  , Promote);
    234 
    235   if (Subtarget.is64Bit()) {
    236     if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
    237       // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
    238       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
    239       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Custom);
    240     } else {
    241       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Promote);
    242       setOperationAction(ISD::FP_TO_UINT   , MVT::i64  , Expand);
    243     }
    244   } else if (!Subtarget.useSoftFloat()) {
    245     // Since AVX is a superset of SSE3, only check for SSE here.
    246     if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
    247       // Expand FP_TO_UINT into a select.
    248       // FIXME: We would like to use a Custom expander here eventually to do
    249       // the optimal thing for SSE vs. the default expansion in the legalizer.
    250       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Expand);
    251     else
    252       // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
    253       // With SSE3 we can use fisttpll to convert to a signed i64; without
    254       // SSE, we're stuck with a fistpll.
    255       setOperationAction(ISD::FP_TO_UINT   , MVT::i32  , Custom);
    256 
    257     setOperationAction(ISD::FP_TO_UINT     , MVT::i64  , Custom);
    258   }
    259 
    260   // TODO: when we have SSE, these could be more efficient, by using movd/movq.
    261   if (!X86ScalarSSEf64) {
    262     setOperationAction(ISD::BITCAST        , MVT::f32  , Expand);
    263     setOperationAction(ISD::BITCAST        , MVT::i32  , Expand);
    264     if (Subtarget.is64Bit()) {
    265       setOperationAction(ISD::BITCAST      , MVT::f64  , Expand);
    266       // Without SSE, i64->f64 goes through memory.
    267       setOperationAction(ISD::BITCAST      , MVT::i64  , Expand);
    268     }
    269   } else if (!Subtarget.is64Bit())
    270     setOperationAction(ISD::BITCAST      , MVT::i64  , Custom);
    271 
    272   // Scalar integer divide and remainder are lowered to use operations that
    273   // produce two results, to match the available instructions. This exposes
    274   // the two-result form to trivial CSE, which is able to combine x/y and x%y
    275   // into a single instruction.
    276   //
    277   // Scalar integer multiply-high is also lowered to use two-result
    278   // operations, to match the available instructions. However, plain multiply
    279   // (low) operations are left as Legal, as there are single-result
    280   // instructions for this in x86. Using the two-result multiply instructions
    281   // when both high and low results are needed must be arranged by dagcombine.
    282   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
    283     setOperationAction(ISD::MULHS, VT, Expand);
    284     setOperationAction(ISD::MULHU, VT, Expand);
    285     setOperationAction(ISD::SDIV, VT, Expand);
    286     setOperationAction(ISD::UDIV, VT, Expand);
    287     setOperationAction(ISD::SREM, VT, Expand);
    288     setOperationAction(ISD::UREM, VT, Expand);
    289 
    290     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
    291     setOperationAction(ISD::ADDC, VT, Custom);
    292     setOperationAction(ISD::ADDE, VT, Custom);
    293     setOperationAction(ISD::SUBC, VT, Custom);
    294     setOperationAction(ISD::SUBE, VT, Custom);
    295   }
    296 
    297   setOperationAction(ISD::BR_JT            , MVT::Other, Expand);
    298   setOperationAction(ISD::BRCOND           , MVT::Other, Custom);
    299   for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
    300                    MVT::i8,  MVT::i16, MVT::i32, MVT::i64 }) {
    301     setOperationAction(ISD::BR_CC,     VT, Expand);
    302     setOperationAction(ISD::SELECT_CC, VT, Expand);
    303   }
    304   if (Subtarget.is64Bit())
    305     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
    306   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16  , Legal);
    307   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8   , Legal);
    308   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1   , Expand);
    309   setOperationAction(ISD::FP_ROUND_INREG   , MVT::f32  , Expand);
    310 
    311   setOperationAction(ISD::FREM             , MVT::f32  , Expand);
    312   setOperationAction(ISD::FREM             , MVT::f64  , Expand);
    313   setOperationAction(ISD::FREM             , MVT::f80  , Expand);
    314   setOperationAction(ISD::FLT_ROUNDS_      , MVT::i32  , Custom);
    315 
    316   // Promote the i8 variants and force them on up to i32 which has a shorter
    317   // encoding.
    318   setOperationPromotedToType(ISD::CTTZ           , MVT::i8   , MVT::i32);
    319   setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
    320   if (!Subtarget.hasBMI()) {
    321     setOperationAction(ISD::CTTZ           , MVT::i16  , Custom);
    322     setOperationAction(ISD::CTTZ           , MVT::i32  , Custom);
    323     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16  , Legal);
    324     setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32  , Legal);
    325     if (Subtarget.is64Bit()) {
    326       setOperationAction(ISD::CTTZ         , MVT::i64  , Custom);
    327       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
    328     }
    329   }
    330 
    331   if (Subtarget.hasLZCNT()) {
    332     // When promoting the i8 variants, force them to i32 for a shorter
    333     // encoding.
    334     setOperationPromotedToType(ISD::CTLZ           , MVT::i8   , MVT::i32);
    335     setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , MVT::i32);
    336   } else {
    337     setOperationAction(ISD::CTLZ           , MVT::i8   , Custom);
    338     setOperationAction(ISD::CTLZ           , MVT::i16  , Custom);
    339     setOperationAction(ISD::CTLZ           , MVT::i32  , Custom);
    340     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8   , Custom);
    341     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16  , Custom);
    342     setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32  , Custom);
    343     if (Subtarget.is64Bit()) {
    344       setOperationAction(ISD::CTLZ         , MVT::i64  , Custom);
    345       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
    346     }
    347   }
    348 
    349   // Special handling for half-precision floating point conversions.
    350   // If we don't have F16C support, then lower half float conversions
    351   // into library calls.
    352   if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
    353     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
    354     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
    355   }
    356 
    357   // There's never any support for operations beyond MVT::f32.
    358   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
    359   setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
    360   setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
    361   setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
    362 
    363   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
    364   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
    365   setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
    366   setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    367   setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    368   setTruncStoreAction(MVT::f80, MVT::f16, Expand);
    369 
    370   if (Subtarget.hasPOPCNT()) {
    371     setOperationAction(ISD::CTPOP          , MVT::i8   , Promote);
    372   } else {
    373     setOperationAction(ISD::CTPOP          , MVT::i8   , Expand);
    374     setOperationAction(ISD::CTPOP          , MVT::i16  , Expand);
    375     setOperationAction(ISD::CTPOP          , MVT::i32  , Expand);
    376     if (Subtarget.is64Bit())
    377       setOperationAction(ISD::CTPOP        , MVT::i64  , Expand);
    378   }
    379 
    380   setOperationAction(ISD::READCYCLECOUNTER , MVT::i64  , Custom);
    381 
    382   if (!Subtarget.hasMOVBE())
    383     setOperationAction(ISD::BSWAP          , MVT::i16  , Expand);
    384 
    385   // These should be promoted to a larger select which is supported.
    386   setOperationAction(ISD::SELECT          , MVT::i1   , Promote);
    387   // X86 wants to expand cmov itself.
    388   for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
    389     setOperationAction(ISD::SELECT, VT, Custom);
    390     setOperationAction(ISD::SETCC, VT, Custom);
    391   }
    392   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
    393     if (VT == MVT::i64 && !Subtarget.is64Bit())
    394       continue;
    395     setOperationAction(ISD::SELECT, VT, Custom);
    396     setOperationAction(ISD::SETCC,  VT, Custom);
    397     setOperationAction(ISD::SETCCE, VT, Custom);
    398   }
    399   setOperationAction(ISD::EH_RETURN       , MVT::Other, Custom);
    400   // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
    401   // SjLj exception handling but a light-weight setjmp/longjmp replacement to
    402   // support continuation, user-level threading, and etc.. As a result, no
    403   // other SjLj exception interfaces are implemented and please don't build
    404   // your own exception handling based on them.
    405   // LLVM/Clang supports zero-cost DWARF exception handling.
    406   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
    407   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
    408   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
    409   if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
    410     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
    411 
    412   // Darwin ABI issue.
    413   for (auto VT : { MVT::i32, MVT::i64 }) {
    414     if (VT == MVT::i64 && !Subtarget.is64Bit())
    415       continue;
    416     setOperationAction(ISD::ConstantPool    , VT, Custom);
    417     setOperationAction(ISD::JumpTable       , VT, Custom);
    418     setOperationAction(ISD::GlobalAddress   , VT, Custom);
    419     setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
    420     setOperationAction(ISD::ExternalSymbol  , VT, Custom);
    421     setOperationAction(ISD::BlockAddress    , VT, Custom);
    422   }
    423   // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
    424   for (auto VT : { MVT::i32, MVT::i64 }) {
    425     if (VT == MVT::i64 && !Subtarget.is64Bit())
    426       continue;
    427     setOperationAction(ISD::SHL_PARTS, VT, Custom);
    428     setOperationAction(ISD::SRA_PARTS, VT, Custom);
    429     setOperationAction(ISD::SRL_PARTS, VT, Custom);
    430   }
    431 
    432   if (Subtarget.hasSSE1())
    433     setOperationAction(ISD::PREFETCH      , MVT::Other, Legal);
    434 
    435   setOperationAction(ISD::ATOMIC_FENCE  , MVT::Other, Custom);
    436 
    437   // Expand certain atomics
    438   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
    439     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
    440     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
    441     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
    442     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
    443     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
    444     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
    445     setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
    446   }
    447 
    448   if (Subtarget.hasCmpxchg16b()) {
    449     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
    450   }
    451 
    452   // FIXME - use subtarget debug flags
    453   if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
    454       !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
    455       TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
    456     setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
    457   }
    458 
    459   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
    460   setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
    461 
    462   setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
    463   setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
    464 
    465   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    466   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
    467 
    468   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
    469   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
    470   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
    471   bool Is64Bit = Subtarget.is64Bit();
    472   setOperationAction(ISD::VAARG,  MVT::Other, Is64Bit ? Custom : Expand);
    473   setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
    474 
    475   setOperationAction(ISD::STACKSAVE,          MVT::Other, Expand);
    476   setOperationAction(ISD::STACKRESTORE,       MVT::Other, Expand);
    477 
    478   setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
    479 
    480   // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
    481   setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
    482   setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
    483 
    484   if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
    485     // f32 and f64 use SSE.
    486     // Set up the FP register classes.
    487     addRegisterClass(MVT::f32, &X86::FR32RegClass);
    488     addRegisterClass(MVT::f64, &X86::FR64RegClass);
    489 
    490     for (auto VT : { MVT::f32, MVT::f64 }) {
    491       // Use ANDPD to simulate FABS.
    492       setOperationAction(ISD::FABS, VT, Custom);
    493 
    494       // Use XORP to simulate FNEG.
    495       setOperationAction(ISD::FNEG, VT, Custom);
    496 
    497       // Use ANDPD and ORPD to simulate FCOPYSIGN.
    498       setOperationAction(ISD::FCOPYSIGN, VT, Custom);
    499 
    500       // We don't support sin/cos/fmod
    501       setOperationAction(ISD::FSIN   , VT, Expand);
    502       setOperationAction(ISD::FCOS   , VT, Expand);
    503       setOperationAction(ISD::FSINCOS, VT, Expand);
    504     }
    505 
    506     // Lower this to MOVMSK plus an AND.
    507     setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
    508     setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
    509 
    510     // Expand FP immediates into loads from the stack, except for the special
    511     // cases we handle.
    512     addLegalFPImmediate(APFloat(+0.0)); // xorpd
    513     addLegalFPImmediate(APFloat(+0.0f)); // xorps
    514   } else if (UseX87 && X86ScalarSSEf32) {
    515     // Use SSE for f32, x87 for f64.
    516     // Set up the FP register classes.
    517     addRegisterClass(MVT::f32, &X86::FR32RegClass);
    518     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
    519 
    520     // Use ANDPS to simulate FABS.
    521     setOperationAction(ISD::FABS , MVT::f32, Custom);
    522 
    523     // Use XORP to simulate FNEG.
    524     setOperationAction(ISD::FNEG , MVT::f32, Custom);
    525 
    526     setOperationAction(ISD::UNDEF,     MVT::f64, Expand);
    527 
    528     // Use ANDPS and ORPS to simulate FCOPYSIGN.
    529     setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
    530     setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
    531 
    532     // We don't support sin/cos/fmod
    533     setOperationAction(ISD::FSIN   , MVT::f32, Expand);
    534     setOperationAction(ISD::FCOS   , MVT::f32, Expand);
    535     setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
    536 
    537     // Special cases we handle for FP constants.
    538     addLegalFPImmediate(APFloat(+0.0f)); // xorps
    539     addLegalFPImmediate(APFloat(+0.0)); // FLD0
    540     addLegalFPImmediate(APFloat(+1.0)); // FLD1
    541     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    542     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
    543 
    544     if (!TM.Options.UnsafeFPMath) {
    545       setOperationAction(ISD::FSIN   , MVT::f64, Expand);
    546       setOperationAction(ISD::FCOS   , MVT::f64, Expand);
    547       setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
    548     }
    549   } else if (UseX87) {
    550     // f32 and f64 in x87.
    551     // Set up the FP register classes.
    552     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
    553     addRegisterClass(MVT::f32, &X86::RFP32RegClass);
    554 
    555     for (auto VT : { MVT::f32, MVT::f64 }) {
    556       setOperationAction(ISD::UNDEF,     VT, Expand);
    557       setOperationAction(ISD::FCOPYSIGN, VT, Expand);
    558 
    559       if (!TM.Options.UnsafeFPMath) {
    560         setOperationAction(ISD::FSIN   , VT, Expand);
    561         setOperationAction(ISD::FCOS   , VT, Expand);
    562         setOperationAction(ISD::FSINCOS, VT, Expand);
    563       }
    564     }
    565     addLegalFPImmediate(APFloat(+0.0)); // FLD0
    566     addLegalFPImmediate(APFloat(+1.0)); // FLD1
    567     addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
    568     addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
    569     addLegalFPImmediate(APFloat(+0.0f)); // FLD0
    570     addLegalFPImmediate(APFloat(+1.0f)); // FLD1
    571     addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
    572     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
    573   }
    574 
    575   // We don't support FMA.
    576   setOperationAction(ISD::FMA, MVT::f64, Expand);
    577   setOperationAction(ISD::FMA, MVT::f32, Expand);
    578 
    579   // Long double always uses X87, except f128 in MMX.
    580   if (UseX87) {
    581     if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
    582       addRegisterClass(MVT::f128, &X86::FR128RegClass);
    583       ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
    584       setOperationAction(ISD::FABS , MVT::f128, Custom);
    585       setOperationAction(ISD::FNEG , MVT::f128, Custom);
    586       setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
    587     }
    588 
    589     addRegisterClass(MVT::f80, &X86::RFP80RegClass);
    590     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
    591     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
    592     {
    593       APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
    594       addLegalFPImmediate(TmpFlt);  // FLD0
    595       TmpFlt.changeSign();
    596       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
    597 
    598       bool ignored;
    599       APFloat TmpFlt2(+1.0);
    600       TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
    601                       &ignored);
    602       addLegalFPImmediate(TmpFlt2);  // FLD1
    603       TmpFlt2.changeSign();
    604       addLegalFPImmediate(TmpFlt2);  // FLD1/FCHS
    605     }
    606 
    607     if (!TM.Options.UnsafeFPMath) {
    608       setOperationAction(ISD::FSIN   , MVT::f80, Expand);
    609       setOperationAction(ISD::FCOS   , MVT::f80, Expand);
    610       setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
    611     }
    612 
    613     setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
    614     setOperationAction(ISD::FCEIL,  MVT::f80, Expand);
    615     setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
    616     setOperationAction(ISD::FRINT,  MVT::f80, Expand);
    617     setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
    618     setOperationAction(ISD::FMA, MVT::f80, Expand);
    619   }
    620 
    621   // Always use a library call for pow.
    622   setOperationAction(ISD::FPOW             , MVT::f32  , Expand);
    623   setOperationAction(ISD::FPOW             , MVT::f64  , Expand);
    624   setOperationAction(ISD::FPOW             , MVT::f80  , Expand);
    625 
    626   setOperationAction(ISD::FLOG, MVT::f80, Expand);
    627   setOperationAction(ISD::FLOG2, MVT::f80, Expand);
    628   setOperationAction(ISD::FLOG10, MVT::f80, Expand);
    629   setOperationAction(ISD::FEXP, MVT::f80, Expand);
    630   setOperationAction(ISD::FEXP2, MVT::f80, Expand);
    631   setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
    632   setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
    633 
    634   // Some FP actions are always expanded for vector types.
    635   for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
    636                    MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
    637     setOperationAction(ISD::FSIN,      VT, Expand);
    638     setOperationAction(ISD::FSINCOS,   VT, Expand);
    639     setOperationAction(ISD::FCOS,      VT, Expand);
    640     setOperationAction(ISD::FREM,      VT, Expand);
    641     setOperationAction(ISD::FPOWI,     VT, Expand);
    642     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
    643     setOperationAction(ISD::FPOW,      VT, Expand);
    644     setOperationAction(ISD::FLOG,      VT, Expand);
    645     setOperationAction(ISD::FLOG2,     VT, Expand);
    646     setOperationAction(ISD::FLOG10,    VT, Expand);
    647     setOperationAction(ISD::FEXP,      VT, Expand);
    648     setOperationAction(ISD::FEXP2,     VT, Expand);
    649   }
    650 
    651   // First set operation action for all vector types to either promote
    652   // (for widening) or expand (for scalarization). Then we will selectively
    653   // turn on ones that can be effectively codegen'd.
    654   for (MVT VT : MVT::vector_valuetypes()) {
    655     setOperationAction(ISD::SDIV, VT, Expand);
    656     setOperationAction(ISD::UDIV, VT, Expand);
    657     setOperationAction(ISD::SREM, VT, Expand);
    658     setOperationAction(ISD::UREM, VT, Expand);
    659     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
    660     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
    661     setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
    662     setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
    663     setOperationAction(ISD::FMA,  VT, Expand);
    664     setOperationAction(ISD::FFLOOR, VT, Expand);
    665     setOperationAction(ISD::FCEIL, VT, Expand);
    666     setOperationAction(ISD::FTRUNC, VT, Expand);
    667     setOperationAction(ISD::FRINT, VT, Expand);
    668     setOperationAction(ISD::FNEARBYINT, VT, Expand);
    669     setOperationAction(ISD::SMUL_LOHI, VT, Expand);
    670     setOperationAction(ISD::MULHS, VT, Expand);
    671     setOperationAction(ISD::UMUL_LOHI, VT, Expand);
    672     setOperationAction(ISD::MULHU, VT, Expand);
    673     setOperationAction(ISD::SDIVREM, VT, Expand);
    674     setOperationAction(ISD::UDIVREM, VT, Expand);
    675     setOperationAction(ISD::CTPOP, VT, Expand);
    676     setOperationAction(ISD::CTTZ, VT, Expand);
    677     setOperationAction(ISD::CTLZ, VT, Expand);
    678     setOperationAction(ISD::ROTL, VT, Expand);
    679     setOperationAction(ISD::ROTR, VT, Expand);
    680     setOperationAction(ISD::BSWAP, VT, Expand);
    681     setOperationAction(ISD::SETCC, VT, Expand);
    682     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
    683     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
    684     setOperationAction(ISD::UINT_TO_FP, VT, Expand);
    685     setOperationAction(ISD::SINT_TO_FP, VT, Expand);
    686     setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
    687     setOperationAction(ISD::TRUNCATE, VT, Expand);
    688     setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
    689     setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
    690     setOperationAction(ISD::ANY_EXTEND, VT, Expand);
    691     setOperationAction(ISD::SELECT_CC, VT, Expand);
    692     for (MVT InnerVT : MVT::vector_valuetypes()) {
    693       setTruncStoreAction(InnerVT, VT, Expand);
    694 
    695       setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
    696       setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
    697 
    698       // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
    699       // types, we have to deal with them whether we ask for Expansion or not.
    700       // Setting Expand causes its own optimisation problems though, so leave
    701       // them legal.
    702       if (VT.getVectorElementType() == MVT::i1)
    703         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
    704 
    705       // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
    706       // split/scalarized right now.
    707       if (VT.getVectorElementType() == MVT::f16)
    708         setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
    709     }
    710   }
    711 
    712   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
    713   // with -msoft-float, disable use of MMX as well.
    714   if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
    715     addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
    716     // No operations on x86mmx supported, everything uses intrinsics.
    717   }
    718 
    719   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
    720     addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
    721 
    722     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
    723     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
    724     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
    725     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
    726     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
    727     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
    728     setOperationAction(ISD::SELECT,             MVT::v4f32, Custom);
    729     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Custom);
    730   }
    731 
    732   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
    733     addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
    734 
    735     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
    736     // registers cannot be used even for integer operations.
    737     addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
    738     addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
    739     addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
    740     addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
    741 
    742     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
    743     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
    744     setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
    745     setOperationAction(ISD::UMUL_LOHI,          MVT::v4i32, Custom);
    746     setOperationAction(ISD::SMUL_LOHI,          MVT::v4i32, Custom);
    747     setOperationAction(ISD::MULHU,              MVT::v16i8, Custom);
    748     setOperationAction(ISD::MULHS,              MVT::v16i8, Custom);
    749     setOperationAction(ISD::MULHU,              MVT::v8i16, Legal);
    750     setOperationAction(ISD::MULHS,              MVT::v8i16, Legal);
    751     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
    752     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
    753     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
    754 
    755     setOperationAction(ISD::SMAX,               MVT::v8i16, Legal);
    756     setOperationAction(ISD::UMAX,               MVT::v16i8, Legal);
    757     setOperationAction(ISD::SMIN,               MVT::v8i16, Legal);
    758     setOperationAction(ISD::UMIN,               MVT::v16i8, Legal);
    759 
    760     setOperationAction(ISD::SETCC,              MVT::v2i64, Custom);
    761     setOperationAction(ISD::SETCC,              MVT::v16i8, Custom);
    762     setOperationAction(ISD::SETCC,              MVT::v8i16, Custom);
    763     setOperationAction(ISD::SETCC,              MVT::v4i32, Custom);
    764 
    765     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v16i8, Custom);
    766     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v8i16, Custom);
    767     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i16, Custom);
    768     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i32, Custom);
    769     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4f32, Custom);
    770 
    771     setOperationAction(ISD::CTPOP,              MVT::v16i8, Custom);
    772     setOperationAction(ISD::CTPOP,              MVT::v8i16, Custom);
    773     setOperationAction(ISD::CTPOP,              MVT::v4i32, Custom);
    774     setOperationAction(ISD::CTPOP,              MVT::v2i64, Custom);
    775 
    776     setOperationAction(ISD::CTTZ,               MVT::v16i8, Custom);
    777     setOperationAction(ISD::CTTZ,               MVT::v8i16, Custom);
    778     setOperationAction(ISD::CTTZ,               MVT::v4i32, Custom);
    779     // ISD::CTTZ v2i64 - scalarization is faster.
    780 
    781     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
    782     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
    783       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
    784       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
    785       setOperationAction(ISD::VSELECT,            VT, Custom);
    786       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    787     }
    788 
    789     // We support custom legalizing of sext and anyext loads for specific
    790     // memory vector types which we can load as a scalar (or sequence of
    791     // scalars) and extend in-register to a legal 128-bit vector type. For sext
    792     // loads these must work with a single scalar load.
    793     for (MVT VT : MVT::integer_vector_valuetypes()) {
    794       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
    795       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
    796       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
    797       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
    798       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
    799       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
    800       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
    801       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
    802       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
    803     }
    804 
    805     for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
    806       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
    807       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
    808       setOperationAction(ISD::VSELECT,            VT, Custom);
    809 
    810       if (VT == MVT::v2i64 && !Subtarget.is64Bit())
    811         continue;
    812 
    813       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
    814       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    815     }
    816 
    817     // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
    818     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
    819       setOperationPromotedToType(ISD::AND,    VT, MVT::v2i64);
    820       setOperationPromotedToType(ISD::OR,     VT, MVT::v2i64);
    821       setOperationPromotedToType(ISD::XOR,    VT, MVT::v2i64);
    822       setOperationPromotedToType(ISD::LOAD,   VT, MVT::v2i64);
    823       setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
    824     }
    825 
    826     // Custom lower v2i64 and v2f64 selects.
    827     setOperationAction(ISD::SELECT,             MVT::v2f64, Custom);
    828     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
    829 
    830     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
    831     setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
    832 
    833     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
    834 
    835     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
    836     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
    837     // As there is no 64-bit GPR available, we need build a special custom
    838     // sequence to convert from v2i32 to v2f32.
    839     if (!Subtarget.is64Bit())
    840       setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
    841 
    842     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
    843     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
    844 
    845     for (MVT VT : MVT::fp_vector_valuetypes())
    846       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
    847 
    848     setOperationAction(ISD::BITCAST,            MVT::v2i32, Custom);
    849     setOperationAction(ISD::BITCAST,            MVT::v4i16, Custom);
    850     setOperationAction(ISD::BITCAST,            MVT::v8i8,  Custom);
    851 
    852     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
    853     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
    854     setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
    855 
    856     for (auto VT : { MVT::v8i16, MVT::v16i8 }) {
    857       setOperationAction(ISD::SRL, VT, Custom);
    858       setOperationAction(ISD::SHL, VT, Custom);
    859       setOperationAction(ISD::SRA, VT, Custom);
    860     }
    861 
    862     // In the customized shift lowering, the legal cases in AVX2 will be
    863     // recognized.
    864     for (auto VT : { MVT::v4i32, MVT::v2i64 }) {
    865       setOperationAction(ISD::SRL, VT, Custom);
    866       setOperationAction(ISD::SHL, VT, Custom);
    867       setOperationAction(ISD::SRA, VT, Custom);
    868     }
    869   }
    870 
    871   if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
    872     setOperationAction(ISD::BITREVERSE,         MVT::v16i8, Custom);
    873     setOperationAction(ISD::CTLZ,               MVT::v16i8, Custom);
    874     setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
    875     // ISD::CTLZ v4i32 - scalarization is faster.
    876     // ISD::CTLZ v2i64 - scalarization is faster.
    877   }
    878 
    879   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
    880     for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
    881       setOperationAction(ISD::FFLOOR,           RoundedTy,  Legal);
    882       setOperationAction(ISD::FCEIL,            RoundedTy,  Legal);
    883       setOperationAction(ISD::FTRUNC,           RoundedTy,  Legal);
    884       setOperationAction(ISD::FRINT,            RoundedTy,  Legal);
    885       setOperationAction(ISD::FNEARBYINT,       RoundedTy,  Legal);
    886     }
    887 
    888     setOperationAction(ISD::SMAX,               MVT::v16i8, Legal);
    889     setOperationAction(ISD::SMAX,               MVT::v4i32, Legal);
    890     setOperationAction(ISD::UMAX,               MVT::v8i16, Legal);
    891     setOperationAction(ISD::UMAX,               MVT::v4i32, Legal);
    892     setOperationAction(ISD::SMIN,               MVT::v16i8, Legal);
    893     setOperationAction(ISD::SMIN,               MVT::v4i32, Legal);
    894     setOperationAction(ISD::UMIN,               MVT::v8i16, Legal);
    895     setOperationAction(ISD::UMIN,               MVT::v4i32, Legal);
    896 
    897     // FIXME: Do we need to handle scalar-to-vector here?
    898     setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
    899 
    900     // We directly match byte blends in the backend as they match the VSELECT
    901     // condition form.
    902     setOperationAction(ISD::VSELECT,            MVT::v16i8, Legal);
    903 
    904     // SSE41 brings specific instructions for doing vector sign extend even in
    905     // cases where we don't have SRA.
    906     for (MVT VT : MVT::integer_vector_valuetypes()) {
    907       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
    908       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
    909       setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
    910     }
    911 
    912     // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
    913     setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
    914     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
    915     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
    916     setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
    917     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
    918     setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
    919 
    920     setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8,  Legal);
    921     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
    922     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
    923     setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
    924     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
    925     setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
    926 
    927     // i8 vectors are custom because the source register and source
    928     // source memory operand types are not the same width.
    929     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i8, Custom);
    930   }
    931 
    932   if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
    933     for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
    934                      MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
    935       setOperationAction(ISD::ROTL, VT, Custom);
    936 
    937     // XOP can efficiently perform BITREVERSE with VPPERM.
    938     for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
    939       setOperationAction(ISD::BITREVERSE, VT, Custom);
    940 
    941     for (auto VT : { MVT::v16i8, MVT::v8i16,  MVT::v4i32, MVT::v2i64,
    942                      MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
    943       setOperationAction(ISD::BITREVERSE, VT, Custom);
    944   }
    945 
    946   if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
    947     bool HasInt256 = Subtarget.hasInt256();
    948 
    949     addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
    950     addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
    951     addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
    952     addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
    953     addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
    954     addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
    955 
    956     for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
    957       setOperationAction(ISD::FFLOOR,     VT, Legal);
    958       setOperationAction(ISD::FCEIL,      VT, Legal);
    959       setOperationAction(ISD::FTRUNC,     VT, Legal);
    960       setOperationAction(ISD::FRINT,      VT, Legal);
    961       setOperationAction(ISD::FNEARBYINT, VT, Legal);
    962       setOperationAction(ISD::FNEG,       VT, Custom);
    963       setOperationAction(ISD::FABS,       VT, Custom);
    964     }
    965 
    966     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
    967     // even though v8i16 is a legal type.
    968     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i16, Promote);
    969     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i16, Promote);
    970     setOperationAction(ISD::FP_TO_SINT,         MVT::v8i32, Legal);
    971 
    972     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i16, Promote);
    973     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i32, Legal);
    974     setOperationAction(ISD::FP_ROUND,           MVT::v4f32, Legal);
    975 
    976     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i8,  Custom);
    977     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i16, Custom);
    978 
    979     for (MVT VT : MVT::fp_vector_valuetypes())
    980       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
    981 
    982     for (auto VT : { MVT::v32i8, MVT::v16i16 }) {
    983       setOperationAction(ISD::SRL, VT, Custom);
    984       setOperationAction(ISD::SHL, VT, Custom);
    985       setOperationAction(ISD::SRA, VT, Custom);
    986     }
    987 
    988     setOperationAction(ISD::SETCC,             MVT::v32i8, Custom);
    989     setOperationAction(ISD::SETCC,             MVT::v16i16, Custom);
    990     setOperationAction(ISD::SETCC,             MVT::v8i32, Custom);
    991     setOperationAction(ISD::SETCC,             MVT::v4i64, Custom);
    992 
    993     setOperationAction(ISD::SELECT,            MVT::v4f64, Custom);
    994     setOperationAction(ISD::SELECT,            MVT::v4i64, Custom);
    995     setOperationAction(ISD::SELECT,            MVT::v8f32, Custom);
    996 
    997     setOperationAction(ISD::SIGN_EXTEND,       MVT::v4i64, Custom);
    998     setOperationAction(ISD::SIGN_EXTEND,       MVT::v8i32, Custom);
    999     setOperationAction(ISD::SIGN_EXTEND,       MVT::v16i16, Custom);
   1000     setOperationAction(ISD::ZERO_EXTEND,       MVT::v4i64, Custom);
   1001     setOperationAction(ISD::ZERO_EXTEND,       MVT::v8i32, Custom);
   1002     setOperationAction(ISD::ZERO_EXTEND,       MVT::v16i16, Custom);
   1003     setOperationAction(ISD::ANY_EXTEND,        MVT::v4i64, Custom);
   1004     setOperationAction(ISD::ANY_EXTEND,        MVT::v8i32, Custom);
   1005     setOperationAction(ISD::ANY_EXTEND,        MVT::v16i16, Custom);
   1006     setOperationAction(ISD::TRUNCATE,          MVT::v16i8, Custom);
   1007     setOperationAction(ISD::TRUNCATE,          MVT::v8i16, Custom);
   1008     setOperationAction(ISD::TRUNCATE,          MVT::v4i32, Custom);
   1009     setOperationAction(ISD::BITREVERSE,        MVT::v32i8, Custom);
   1010 
   1011     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
   1012       setOperationAction(ISD::CTPOP,           VT, Custom);
   1013       setOperationAction(ISD::CTTZ,            VT, Custom);
   1014     }
   1015 
   1016     // ISD::CTLZ v8i32/v4i64 - scalarization is faster without AVX2
   1017     // as we end up splitting the 256-bit vectors.
   1018     for (auto VT : { MVT::v32i8, MVT::v16i16 })
   1019       setOperationAction(ISD::CTLZ,            VT, Custom);
   1020 
   1021     if (HasInt256)
   1022       for (auto VT : { MVT::v8i32, MVT::v4i64 })
   1023         setOperationAction(ISD::CTLZ,          VT, Custom);
   1024 
   1025     if (Subtarget.hasAnyFMA()) {
   1026       for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
   1027                        MVT::v2f64, MVT::v4f64 })
   1028         setOperationAction(ISD::FMA, VT, Legal);
   1029     }
   1030 
   1031     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
   1032       setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
   1033       setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
   1034     }
   1035 
   1036     setOperationAction(ISD::MUL,       MVT::v4i64,  Custom);
   1037     setOperationAction(ISD::MUL,       MVT::v8i32,  HasInt256 ? Legal : Custom);
   1038     setOperationAction(ISD::MUL,       MVT::v16i16, HasInt256 ? Legal : Custom);
   1039     setOperationAction(ISD::MUL,       MVT::v32i8,  Custom);
   1040 
   1041     setOperationAction(ISD::UMUL_LOHI, MVT::v8i32,  Custom);
   1042     setOperationAction(ISD::SMUL_LOHI, MVT::v8i32,  Custom);
   1043 
   1044     setOperationAction(ISD::MULHU,     MVT::v16i16, HasInt256 ? Legal : Custom);
   1045     setOperationAction(ISD::MULHS,     MVT::v16i16, HasInt256 ? Legal : Custom);
   1046     setOperationAction(ISD::MULHU,     MVT::v32i8,  Custom);
   1047     setOperationAction(ISD::MULHS,     MVT::v32i8,  Custom);
   1048 
   1049     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
   1050       setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
   1051       setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
   1052       setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
   1053       setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
   1054     }
   1055 
   1056     if (HasInt256) {
   1057       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64,  Custom);
   1058       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32,  Custom);
   1059       setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
   1060 
   1061       // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
   1062       // when we have a 256bit-wide blend with immediate.
   1063       setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
   1064 
   1065       // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
   1066       setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
   1067       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
   1068       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
   1069       setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
   1070       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
   1071       setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
   1072 
   1073       setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
   1074       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i8,  Legal);
   1075       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i8,  Legal);
   1076       setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32,  MVT::v8i16, Legal);
   1077       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i16, Legal);
   1078       setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64,  MVT::v4i32, Legal);
   1079     }
   1080 
   1081     // In the customized shift lowering, the legal cases in AVX2 will be
   1082     // recognized.
   1083     for (auto VT : { MVT::v8i32, MVT::v4i64 }) {
   1084       setOperationAction(ISD::SRL, VT, Custom);
   1085       setOperationAction(ISD::SHL, VT, Custom);
   1086       setOperationAction(ISD::SRA, VT, Custom);
   1087     }
   1088 
   1089     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
   1090                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
   1091       setOperationAction(ISD::MLOAD,  VT, Legal);
   1092       setOperationAction(ISD::MSTORE, VT, Legal);
   1093     }
   1094 
   1095     // Extract subvector is special because the value type
   1096     // (result) is 128-bit but the source is 256-bit wide.
   1097     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
   1098                      MVT::v4f32, MVT::v2f64 }) {
   1099       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   1100     }
   1101 
   1102     // Custom lower several nodes for 256-bit types.
   1103     for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
   1104                     MVT::v8f32, MVT::v4f64 }) {
   1105       setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
   1106       setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
   1107       setOperationAction(ISD::VSELECT,            VT, Custom);
   1108       setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
   1109       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
   1110       setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
   1111       setOperationAction(ISD::INSERT_SUBVECTOR,   VT, Custom);
   1112       setOperationAction(ISD::CONCAT_VECTORS,     VT, Custom);
   1113     }
   1114 
   1115     if (HasInt256)
   1116       setOperationAction(ISD::VSELECT,         MVT::v32i8, Legal);
   1117 
   1118     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
   1119     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
   1120       setOperationPromotedToType(ISD::AND,    VT, MVT::v4i64);
   1121       setOperationPromotedToType(ISD::OR,     VT, MVT::v4i64);
   1122       setOperationPromotedToType(ISD::XOR,    VT, MVT::v4i64);
   1123       setOperationPromotedToType(ISD::LOAD,   VT, MVT::v4i64);
   1124       setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
   1125     }
   1126   }
   1127 
   1128   if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
   1129     addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
   1130     addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
   1131     addRegisterClass(MVT::v8i64,  &X86::VR512RegClass);
   1132     addRegisterClass(MVT::v8f64,  &X86::VR512RegClass);
   1133 
   1134     addRegisterClass(MVT::i1,     &X86::VK1RegClass);
   1135     addRegisterClass(MVT::v8i1,   &X86::VK8RegClass);
   1136     addRegisterClass(MVT::v16i1,  &X86::VK16RegClass);
   1137 
   1138     for (MVT VT : MVT::fp_vector_valuetypes())
   1139       setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
   1140 
   1141     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
   1142       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8,  Legal);
   1143       setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
   1144       setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8,  Legal);
   1145       setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i8,   Legal);
   1146       setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i16,  Legal);
   1147       setLoadExtAction(ExtType, MVT::v8i64,  MVT::v8i32,  Legal);
   1148     }
   1149     setOperationAction(ISD::BR_CC,              MVT::i1,    Expand);
   1150     setOperationAction(ISD::SETCC,              MVT::i1,    Custom);
   1151     setOperationAction(ISD::SETCCE,             MVT::i1,    Custom);
   1152     setOperationAction(ISD::SELECT_CC,          MVT::i1,    Expand);
   1153     setOperationAction(ISD::XOR,                MVT::i1,    Legal);
   1154     setOperationAction(ISD::OR,                 MVT::i1,    Legal);
   1155     setOperationAction(ISD::AND,                MVT::i1,    Legal);
   1156     setOperationAction(ISD::SUB,                MVT::i1,    Custom);
   1157     setOperationAction(ISD::ADD,                MVT::i1,    Custom);
   1158     setOperationAction(ISD::MUL,                MVT::i1,    Custom);
   1159 
   1160     for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
   1161                    MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
   1162                    MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
   1163       MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
   1164       setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
   1165       setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
   1166       setLoadExtAction(ISD::EXTLOAD,  VT, MaskVT, Custom);
   1167       setTruncStoreAction(VT, MaskVT, Custom);
   1168     }
   1169 
   1170     for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
   1171       setOperationAction(ISD::FNEG,  VT, Custom);
   1172       setOperationAction(ISD::FABS,  VT, Custom);
   1173       setOperationAction(ISD::FMA,   VT, Legal);
   1174     }
   1175 
   1176     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
   1177     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
   1178     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
   1179     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
   1180     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
   1181     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
   1182     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
   1183     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i8,  Promote);
   1184     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i16, Promote);
   1185     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i32, Legal);
   1186     setOperationAction(ISD::UINT_TO_FP,         MVT::v8i32, Legal);
   1187     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i32, Legal);
   1188     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i8, Custom);
   1189     setOperationAction(ISD::UINT_TO_FP,         MVT::v16i16, Custom);
   1190     setOperationAction(ISD::FP_ROUND,           MVT::v8f32, Legal);
   1191     setOperationAction(ISD::FP_EXTEND,          MVT::v8f32, Legal);
   1192 
   1193     setTruncStoreAction(MVT::v8i64,   MVT::v8i8,   Legal);
   1194     setTruncStoreAction(MVT::v8i64,   MVT::v8i16,  Legal);
   1195     setTruncStoreAction(MVT::v8i64,   MVT::v8i32,  Legal);
   1196     setTruncStoreAction(MVT::v16i32,  MVT::v16i8,  Legal);
   1197     setTruncStoreAction(MVT::v16i32,  MVT::v16i16, Legal);
   1198     if (Subtarget.hasVLX()){
   1199       setTruncStoreAction(MVT::v4i64, MVT::v4i8,  Legal);
   1200       setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
   1201       setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
   1202       setTruncStoreAction(MVT::v8i32, MVT::v8i8,  Legal);
   1203       setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
   1204 
   1205       setTruncStoreAction(MVT::v2i64, MVT::v2i8,  Legal);
   1206       setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
   1207       setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
   1208       setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
   1209       setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
   1210     } else {
   1211       setOperationAction(ISD::MLOAD,    MVT::v8i32, Custom);
   1212       setOperationAction(ISD::MLOAD,    MVT::v8f32, Custom);
   1213       setOperationAction(ISD::MSTORE,   MVT::v8i32, Custom);
   1214       setOperationAction(ISD::MSTORE,   MVT::v8f32, Custom);
   1215     }
   1216     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
   1217     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
   1218     setOperationAction(ISD::TRUNCATE,           MVT::v8i32, Custom);
   1219     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v8i1,  Custom);
   1220     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v16i1, Custom);
   1221     setOperationAction(ISD::VSELECT,            MVT::v8i1,  Expand);
   1222     setOperationAction(ISD::VSELECT,            MVT::v16i1, Expand);
   1223     if (Subtarget.hasDQI()) {
   1224       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i64, Legal);
   1225       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
   1226       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
   1227       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
   1228       if (Subtarget.hasVLX()) {
   1229         setOperationAction(ISD::SINT_TO_FP,    MVT::v4i64, Legal);
   1230         setOperationAction(ISD::SINT_TO_FP,    MVT::v2i64, Legal);
   1231         setOperationAction(ISD::UINT_TO_FP,    MVT::v4i64, Legal);
   1232         setOperationAction(ISD::UINT_TO_FP,    MVT::v2i64, Legal);
   1233         setOperationAction(ISD::FP_TO_SINT,    MVT::v4i64, Legal);
   1234         setOperationAction(ISD::FP_TO_SINT,    MVT::v2i64, Legal);
   1235         setOperationAction(ISD::FP_TO_UINT,    MVT::v4i64, Legal);
   1236         setOperationAction(ISD::FP_TO_UINT,    MVT::v2i64, Legal);
   1237       }
   1238     }
   1239     if (Subtarget.hasVLX()) {
   1240       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i32, Legal);
   1241       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i32, Legal);
   1242       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
   1243       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i32, Legal);
   1244       setOperationAction(ISD::SINT_TO_FP,       MVT::v4i32, Legal);
   1245       setOperationAction(ISD::UINT_TO_FP,       MVT::v4i32, Legal);
   1246       setOperationAction(ISD::FP_TO_SINT,       MVT::v4i32, Legal);
   1247       setOperationAction(ISD::FP_TO_UINT,       MVT::v4i32, Legal);
   1248       setOperationAction(ISD::ZERO_EXTEND,      MVT::v4i32, Custom);
   1249       setOperationAction(ISD::ZERO_EXTEND,      MVT::v2i64, Custom);
   1250 
   1251       // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
   1252       setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8,  Legal);
   1253       setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
   1254       setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8,  Legal);
   1255       setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
   1256       setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8,  Legal);
   1257       setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
   1258       setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
   1259       setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8,  Legal);
   1260       setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
   1261       setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
   1262     }
   1263 
   1264     setOperationAction(ISD::TRUNCATE,           MVT::v8i1, Custom);
   1265     setOperationAction(ISD::TRUNCATE,           MVT::v16i1, Custom);
   1266     setOperationAction(ISD::TRUNCATE,           MVT::v16i16, Custom);
   1267     setOperationAction(ISD::ZERO_EXTEND,        MVT::v16i32, Custom);
   1268     setOperationAction(ISD::ZERO_EXTEND,        MVT::v8i64, Custom);
   1269     setOperationAction(ISD::ANY_EXTEND,         MVT::v16i32, Custom);
   1270     setOperationAction(ISD::ANY_EXTEND,         MVT::v8i64, Custom);
   1271     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i32, Custom);
   1272     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i64, Custom);
   1273     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i8, Custom);
   1274     setOperationAction(ISD::SIGN_EXTEND,        MVT::v8i16, Custom);
   1275     setOperationAction(ISD::SIGN_EXTEND,        MVT::v16i16, Custom);
   1276     if (Subtarget.hasDQI()) {
   1277       setOperationAction(ISD::SIGN_EXTEND,        MVT::v4i32, Custom);
   1278       setOperationAction(ISD::SIGN_EXTEND,        MVT::v2i64, Custom);
   1279     }
   1280     for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
   1281       setOperationAction(ISD::FFLOOR,     VT, Legal);
   1282       setOperationAction(ISD::FCEIL,      VT, Legal);
   1283       setOperationAction(ISD::FTRUNC,     VT, Legal);
   1284       setOperationAction(ISD::FRINT,      VT, Legal);
   1285       setOperationAction(ISD::FNEARBYINT, VT, Legal);
   1286     }
   1287 
   1288     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
   1289     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
   1290     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
   1291     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i32,  Custom);
   1292     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16i1,   Custom);
   1293 
   1294     setOperationAction(ISD::SETCC,              MVT::v16i1, Custom);
   1295     setOperationAction(ISD::SETCC,              MVT::v8i1, Custom);
   1296 
   1297     setOperationAction(ISD::MUL,              MVT::v8i64, Custom);
   1298 
   1299     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1,  Custom);
   1300     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
   1301     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v16i1, Custom);
   1302     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v16i1, Custom);
   1303     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v8i1, Custom);
   1304     setOperationAction(ISD::BUILD_VECTOR,       MVT::v8i1, Custom);
   1305     setOperationAction(ISD::BUILD_VECTOR,       MVT::v16i1, Custom);
   1306     setOperationAction(ISD::SELECT,             MVT::v8f64, Custom);
   1307     setOperationAction(ISD::SELECT,             MVT::v8i64, Custom);
   1308     setOperationAction(ISD::SELECT,             MVT::v16f32, Custom);
   1309     setOperationAction(ISD::SELECT,             MVT::v16i1, Custom);
   1310     setOperationAction(ISD::SELECT,             MVT::v8i1,  Custom);
   1311 
   1312     setOperationAction(ISD::SMAX,               MVT::v16i32, Legal);
   1313     setOperationAction(ISD::SMAX,               MVT::v8i64, Legal);
   1314     setOperationAction(ISD::UMAX,               MVT::v16i32, Legal);
   1315     setOperationAction(ISD::UMAX,               MVT::v8i64, Legal);
   1316     setOperationAction(ISD::SMIN,               MVT::v16i32, Legal);
   1317     setOperationAction(ISD::SMIN,               MVT::v8i64, Legal);
   1318     setOperationAction(ISD::UMIN,               MVT::v16i32, Legal);
   1319     setOperationAction(ISD::UMIN,               MVT::v8i64, Legal);
   1320 
   1321     setOperationAction(ISD::ADD,                MVT::v8i1,  Expand);
   1322     setOperationAction(ISD::ADD,                MVT::v16i1, Expand);
   1323     setOperationAction(ISD::SUB,                MVT::v8i1,  Expand);
   1324     setOperationAction(ISD::SUB,                MVT::v16i1, Expand);
   1325     setOperationAction(ISD::MUL,                MVT::v8i1,  Expand);
   1326     setOperationAction(ISD::MUL,                MVT::v16i1, Expand);
   1327 
   1328     setOperationAction(ISD::MUL,                MVT::v16i32, Legal);
   1329 
   1330     for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
   1331       setOperationAction(ISD::SRL, VT, Custom);
   1332       setOperationAction(ISD::SHL, VT, Custom);
   1333       setOperationAction(ISD::SRA, VT, Custom);
   1334       setOperationAction(ISD::AND, VT, Legal);
   1335       setOperationAction(ISD::OR,  VT, Legal);
   1336       setOperationAction(ISD::XOR, VT, Legal);
   1337       setOperationAction(ISD::CTPOP, VT, Custom);
   1338       setOperationAction(ISD::CTTZ, VT, Custom);
   1339     }
   1340 
   1341     if (Subtarget.hasCDI()) {
   1342       setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
   1343       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
   1344 
   1345       setOperationAction(ISD::CTLZ,             MVT::v8i16,  Custom);
   1346       setOperationAction(ISD::CTLZ,             MVT::v16i8,  Custom);
   1347       setOperationAction(ISD::CTLZ,             MVT::v16i16, Custom);
   1348       setOperationAction(ISD::CTLZ,             MVT::v32i8,  Custom);
   1349 
   1350       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i64,  Custom);
   1351       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v16i32, Custom);
   1352 
   1353       if (Subtarget.hasVLX()) {
   1354         setOperationAction(ISD::CTLZ,             MVT::v4i64, Legal);
   1355         setOperationAction(ISD::CTLZ,             MVT::v8i32, Legal);
   1356         setOperationAction(ISD::CTLZ,             MVT::v2i64, Legal);
   1357         setOperationAction(ISD::CTLZ,             MVT::v4i32, Legal);
   1358       } else {
   1359         setOperationAction(ISD::CTLZ,             MVT::v4i64, Custom);
   1360         setOperationAction(ISD::CTLZ,             MVT::v8i32, Custom);
   1361         setOperationAction(ISD::CTLZ,             MVT::v2i64, Custom);
   1362         setOperationAction(ISD::CTLZ,             MVT::v4i32, Custom);
   1363       }
   1364 
   1365       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i64, Custom);
   1366       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v8i32, Custom);
   1367       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v2i64, Custom);
   1368       setOperationAction(ISD::CTTZ_ZERO_UNDEF,  MVT::v4i32, Custom);
   1369     } // Subtarget.hasCDI()
   1370 
   1371     if (Subtarget.hasDQI()) {
   1372       if (Subtarget.hasVLX()) {
   1373         setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
   1374         setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
   1375       }
   1376       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
   1377     }
   1378     // Custom lower several nodes.
   1379     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
   1380                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
   1381       setOperationAction(ISD::MGATHER,  VT, Custom);
   1382       setOperationAction(ISD::MSCATTER, VT, Custom);
   1383     }
   1384     // Extract subvector is special because the value type
   1385     // (result) is 256-bit but the source is 512-bit wide.
   1386     // 128-bit was made Custom under AVX1.
   1387     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
   1388                      MVT::v8f32, MVT::v4f64 })
   1389       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
   1390     for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
   1391                      MVT::v16i1, MVT::v32i1, MVT::v64i1 })
   1392       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
   1393 
   1394     for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
   1395       setOperationAction(ISD::VECTOR_SHUFFLE,      VT, Custom);
   1396       setOperationAction(ISD::INSERT_VECTOR_ELT,   VT, Custom);
   1397       setOperationAction(ISD::BUILD_VECTOR,        VT, Custom);
   1398       setOperationAction(ISD::VSELECT,             VT, Legal);
   1399       setOperationAction(ISD::EXTRACT_VECTOR_ELT,  VT, Custom);
   1400       setOperationAction(ISD::SCALAR_TO_VECTOR,    VT, Custom);
   1401       setOperationAction(ISD::INSERT_SUBVECTOR,    VT, Custom);
   1402       setOperationAction(ISD::MLOAD,               VT, Legal);
   1403       setOperationAction(ISD::MSTORE,              VT, Legal);
   1404       setOperationAction(ISD::MGATHER,             VT, Legal);
   1405       setOperationAction(ISD::MSCATTER,            VT, Custom);
   1406     }
   1407     for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
   1408       setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
   1409     }
   1410   }// has  AVX-512
   1411 
   1412   if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
   1413     addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
   1414     addRegisterClass(MVT::v64i8,  &X86::VR512RegClass);
   1415 
   1416     addRegisterClass(MVT::v32i1,  &X86::VK32RegClass);
   1417     addRegisterClass(MVT::v64i1,  &X86::VK64RegClass);
   1418 
   1419     setOperationAction(ISD::ADD,                MVT::v32i1, Expand);
   1420     setOperationAction(ISD::ADD,                MVT::v64i1, Expand);
   1421     setOperationAction(ISD::SUB,                MVT::v32i1, Expand);
   1422     setOperationAction(ISD::SUB,                MVT::v64i1, Expand);
   1423     setOperationAction(ISD::MUL,                MVT::v32i1, Expand);
   1424     setOperationAction(ISD::MUL,                MVT::v64i1, Expand);
   1425 
   1426     setOperationAction(ISD::SETCC,              MVT::v32i1, Custom);
   1427     setOperationAction(ISD::SETCC,              MVT::v64i1, Custom);
   1428     setOperationAction(ISD::MUL,                MVT::v32i16, Legal);
   1429     setOperationAction(ISD::MUL,                MVT::v64i8, Custom);
   1430     setOperationAction(ISD::MULHS,              MVT::v32i16, Legal);
   1431     setOperationAction(ISD::MULHU,              MVT::v32i16, Legal);
   1432     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i1, Custom);
   1433     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i1, Custom);
   1434     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v32i16, Custom);
   1435     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v64i8, Custom);
   1436     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i1, Custom);
   1437     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i1, Custom);
   1438     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v32i16, Custom);
   1439     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Custom);
   1440     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
   1441     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
   1442     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i16, Custom);
   1443     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v64i8, Custom);
   1444     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
   1445     setOperationAction(ISD::SELECT,             MVT::v64i1, Custom);
   1446     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i8, Custom);
   1447     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i8, Custom);
   1448     setOperationAction(ISD::SIGN_EXTEND,        MVT::v32i16, Custom);
   1449     setOperationAction(ISD::ZERO_EXTEND,        MVT::v32i16, Custom);
   1450     setOperationAction(ISD::ANY_EXTEND,         MVT::v32i16, Custom);
   1451     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i16, Custom);
   1452     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i8, Custom);
   1453     setOperationAction(ISD::SIGN_EXTEND,        MVT::v64i8, Custom);
   1454     setOperationAction(ISD::ZERO_EXTEND,        MVT::v64i8, Custom);
   1455     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i1, Custom);
   1456     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i1, Custom);
   1457     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v32i16, Custom);
   1458     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v64i8, Custom);
   1459     setOperationAction(ISD::VSELECT,            MVT::v32i16, Legal);
   1460     setOperationAction(ISD::VSELECT,            MVT::v64i8, Legal);
   1461     setOperationAction(ISD::TRUNCATE,           MVT::v32i1, Custom);
   1462     setOperationAction(ISD::TRUNCATE,           MVT::v64i1, Custom);
   1463     setOperationAction(ISD::TRUNCATE,           MVT::v32i8, Custom);
   1464     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v32i1, Custom);
   1465     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v64i1, Custom);
   1466     setOperationAction(ISD::BUILD_VECTOR,       MVT::v32i1, Custom);
   1467     setOperationAction(ISD::BUILD_VECTOR,       MVT::v64i1, Custom);
   1468     setOperationAction(ISD::VSELECT,            MVT::v32i1, Expand);
   1469     setOperationAction(ISD::VSELECT,            MVT::v64i1, Expand);
   1470     setOperationAction(ISD::BITREVERSE,         MVT::v64i8, Custom);
   1471 
   1472     setOperationAction(ISD::SMAX,               MVT::v64i8, Legal);
   1473     setOperationAction(ISD::SMAX,               MVT::v32i16, Legal);
   1474     setOperationAction(ISD::UMAX,               MVT::v64i8, Legal);
   1475     setOperationAction(ISD::UMAX,               MVT::v32i16, Legal);
   1476     setOperationAction(ISD::SMIN,               MVT::v64i8, Legal);
   1477     setOperationAction(ISD::SMIN,               MVT::v32i16, Legal);
   1478     setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
   1479     setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
   1480 
   1481     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
   1482     setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
   1483     if (Subtarget.hasVLX())
   1484       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
   1485 
   1486     LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
   1487     for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
   1488       setOperationAction(ISD::MLOAD,               VT, Action);
   1489       setOperationAction(ISD::MSTORE,              VT, Action);
   1490     }
   1491 
   1492     if (Subtarget.hasCDI()) {
   1493       setOperationAction(ISD::CTLZ,            MVT::v32i16, Custom);
   1494       setOperationAction(ISD::CTLZ,            MVT::v64i8,  Custom);
   1495     }
   1496 
   1497     for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
   1498       setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
   1499       setOperationAction(ISD::VSELECT,      VT, Legal);
   1500       setOperationAction(ISD::SRL,          VT, Custom);
   1501       setOperationAction(ISD::SHL,          VT, Custom);
   1502       setOperationAction(ISD::SRA,          VT, Custom);
   1503       setOperationAction(ISD::MLOAD,        VT, Legal);
   1504       setOperationAction(ISD::MSTORE,       VT, Legal);
   1505       setOperationAction(ISD::CTPOP,        VT, Custom);
   1506       setOperationAction(ISD::CTTZ,         VT, Custom);
   1507 
   1508       setOperationPromotedToType(ISD::AND,  VT, MVT::v8i64);
   1509       setOperationPromotedToType(ISD::OR,   VT, MVT::v8i64);
   1510       setOperationPromotedToType(ISD::XOR,  VT, MVT::v8i64);
   1511     }
   1512 
   1513     for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
   1514       setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
   1515       if (Subtarget.hasVLX()) {
   1516         // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
   1517         setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
   1518         setLoadExtAction(ExtType, MVT::v8i16,  MVT::v8i8,  Legal);
   1519       }
   1520     }
   1521   }
   1522 
   1523   if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
   1524     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
   1525     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
   1526 
   1527     setOperationAction(ISD::ADD,                MVT::v2i1, Expand);
   1528     setOperationAction(ISD::ADD,                MVT::v4i1, Expand);
   1529     setOperationAction(ISD::SUB,                MVT::v2i1, Expand);
   1530     setOperationAction(ISD::SUB,                MVT::v4i1, Expand);
   1531     setOperationAction(ISD::MUL,                MVT::v2i1, Expand);
   1532     setOperationAction(ISD::MUL,                MVT::v4i1, Expand);
   1533 
   1534     setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
   1535     setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
   1536     setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
   1537     setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
   1538     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
   1539     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
   1540     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
   1541     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
   1542     setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
   1543     setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
   1544     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
   1545     setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
   1546     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i1, Custom);
   1547     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i1, Custom);
   1548     setOperationAction(ISD::VSELECT,            MVT::v2i1, Expand);
   1549     setOperationAction(ISD::VSELECT,            MVT::v4i1, Expand);
   1550 
   1551     for (auto VT : { MVT::v4i32, MVT::v8i32 }) {
   1552       setOperationAction(ISD::AND, VT, Legal);
   1553       setOperationAction(ISD::OR,  VT, Legal);
   1554       setOperationAction(ISD::XOR, VT, Legal);
   1555     }
   1556 
   1557     for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
   1558       setOperationAction(ISD::SMAX, VT, Legal);
   1559       setOperationAction(ISD::UMAX, VT, Legal);
   1560       setOperationAction(ISD::SMIN, VT, Legal);
   1561       setOperationAction(ISD::UMIN, VT, Legal);
   1562     }
   1563   }
   1564 
   1565   // We want to custom lower some of our intrinsics.
   1566   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
   1567   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   1568   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   1569   if (!Subtarget.is64Bit()) {
   1570     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
   1571     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
   1572   }
   1573 
   1574   // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
   1575   // handle type legalization for these operations here.
   1576   //
   1577   // FIXME: We really should do custom legalization for addition and
   1578   // subtraction on x86-32 once PR3203 is fixed.  We really can't do much better
   1579   // than generic legalization for 64-bit multiplication-with-overflow, though.
   1580   for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
   1581     if (VT == MVT::i64 && !Subtarget.is64Bit())
   1582       continue;
   1583     // Add/Sub/Mul with overflow operations are custom lowered.
   1584     setOperationAction(ISD::SADDO, VT, Custom);
   1585     setOperationAction(ISD::UADDO, VT, Custom);
   1586     setOperationAction(ISD::SSUBO, VT, Custom);
   1587     setOperationAction(ISD::USUBO, VT, Custom);
   1588     setOperationAction(ISD::SMULO, VT, Custom);
   1589     setOperationAction(ISD::UMULO, VT, Custom);
   1590   }
   1591 
   1592   if (!Subtarget.is64Bit()) {
   1593     // These libcalls are not available in 32-bit.
   1594     setLibcallName(RTLIB::SHL_I128, nullptr);
   1595     setLibcallName(RTLIB::SRL_I128, nullptr);
   1596     setLibcallName(RTLIB::SRA_I128, nullptr);
   1597   }
   1598 
   1599   // Combine sin / cos into one node or libcall if possible.
   1600   if (Subtarget.hasSinCos()) {
   1601     setLibcallName(RTLIB::SINCOS_F32, "sincosf");
   1602     setLibcallName(RTLIB::SINCOS_F64, "sincos");
   1603     if (Subtarget.isTargetDarwin()) {
   1604       // For MacOSX, we don't want the normal expansion of a libcall to sincos.
   1605       // We want to issue a libcall to __sincos_stret to avoid memory traffic.
   1606       setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
   1607       setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
   1608     }
   1609   }
   1610 
   1611   if (Subtarget.isTargetWin64()) {
   1612     setOperationAction(ISD::SDIV, MVT::i128, Custom);
   1613     setOperationAction(ISD::UDIV, MVT::i128, Custom);
   1614     setOperationAction(ISD::SREM, MVT::i128, Custom);
   1615     setOperationAction(ISD::UREM, MVT::i128, Custom);
   1616     setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
   1617     setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
   1618   }
   1619 
   1620   // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
   1621   // is. We should promote the value to 64-bits to solve this.
   1622   // This is what the CRT headers do - `fmodf` is an inline header
   1623   // function casting to f64 and calling `fmod`.
   1624   if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC())
   1625     for (ISD::NodeType Op :
   1626          {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
   1627           ISD::FLOG10, ISD::FPOW, ISD::FSIN})
   1628       if (isOperationExpand(Op, MVT::f32))
   1629         setOperationAction(Op, MVT::f32, Promote);
   1630 
   1631   // We have target-specific dag combine patterns for the following nodes:
   1632   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   1633   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   1634   setTargetDAGCombine(ISD::BITCAST);
   1635   setTargetDAGCombine(ISD::VSELECT);
   1636   setTargetDAGCombine(ISD::SELECT);
   1637   setTargetDAGCombine(ISD::SHL);
   1638   setTargetDAGCombine(ISD::SRA);
   1639   setTargetDAGCombine(ISD::SRL);
   1640   setTargetDAGCombine(ISD::OR);
   1641   setTargetDAGCombine(ISD::AND);
   1642   setTargetDAGCombine(ISD::ADD);
   1643   setTargetDAGCombine(ISD::FADD);
   1644   setTargetDAGCombine(ISD::FSUB);
   1645   setTargetDAGCombine(ISD::FNEG);
   1646   setTargetDAGCombine(ISD::FMA);
   1647   setTargetDAGCombine(ISD::FMINNUM);
   1648   setTargetDAGCombine(ISD::FMAXNUM);
   1649   setTargetDAGCombine(ISD::SUB);
   1650   setTargetDAGCombine(ISD::LOAD);
   1651   setTargetDAGCombine(ISD::MLOAD);
   1652   setTargetDAGCombine(ISD::STORE);
   1653   setTargetDAGCombine(ISD::MSTORE);
   1654   setTargetDAGCombine(ISD::TRUNCATE);
   1655   setTargetDAGCombine(ISD::ZERO_EXTEND);
   1656   setTargetDAGCombine(ISD::ANY_EXTEND);
   1657   setTargetDAGCombine(ISD::SIGN_EXTEND);
   1658   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   1659   setTargetDAGCombine(ISD::SINT_TO_FP);
   1660   setTargetDAGCombine(ISD::UINT_TO_FP);
   1661   setTargetDAGCombine(ISD::SETCC);
   1662   setTargetDAGCombine(ISD::MUL);
   1663   setTargetDAGCombine(ISD::XOR);
   1664   setTargetDAGCombine(ISD::MSCATTER);
   1665   setTargetDAGCombine(ISD::MGATHER);
   1666 
   1667   computeRegisterProperties(Subtarget.getRegisterInfo());
   1668 
   1669   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
   1670   MaxStoresPerMemsetOptSize = 8;
   1671   MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
   1672   MaxStoresPerMemcpyOptSize = 4;
   1673   MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
   1674   MaxStoresPerMemmoveOptSize = 4;
   1675   setPrefLoopAlignment(4); // 2^4 bytes.
   1676 
   1677   // An out-of-order CPU can speculatively execute past a predictable branch,
   1678   // but a conditional move could be stalled by an expensive earlier operation.
   1679   PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
   1680   EnableExtLdPromotion = true;
   1681   setPrefFunctionAlignment(4); // 2^4 bytes.
   1682 
   1683   verifyIntrinsicTables();
   1684 }
   1685 
   1686 // This has so far only been implemented for 64-bit MachO.
   1687 bool X86TargetLowering::useLoadStackGuardNode() const {
   1688   return Subtarget.isTargetMachO() && Subtarget.is64Bit();
   1689 }
   1690 
   1691 TargetLoweringBase::LegalizeTypeAction
   1692 X86TargetLowering::getPreferredVectorAction(EVT VT) const {
   1693   if (ExperimentalVectorWideningLegalization &&
   1694       VT.getVectorNumElements() != 1 &&
   1695       VT.getVectorElementType().getSimpleVT() != MVT::i1)
   1696     return TypeWidenVector;
   1697 
   1698   return TargetLoweringBase::getPreferredVectorAction(VT);
   1699 }
   1700 
   1701 EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
   1702                                           LLVMContext& Context,
   1703                                           EVT VT) const {
   1704   if (!VT.isVector())
   1705     return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
   1706 
   1707   if (VT.isSimple()) {
   1708     MVT VVT = VT.getSimpleVT();
   1709     const unsigned NumElts = VVT.getVectorNumElements();
   1710     MVT EltVT = VVT.getVectorElementType();
   1711     if (VVT.is512BitVector()) {
   1712       if (Subtarget.hasAVX512())
   1713         if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
   1714             EltVT == MVT::f32 || EltVT == MVT::f64)
   1715           switch(NumElts) {
   1716           case  8: return MVT::v8i1;
   1717           case 16: return MVT::v16i1;
   1718         }
   1719       if (Subtarget.hasBWI())
   1720         if (EltVT == MVT::i8 || EltVT == MVT::i16)
   1721           switch(NumElts) {
   1722           case 32: return MVT::v32i1;
   1723           case 64: return MVT::v64i1;
   1724         }
   1725     }
   1726 
   1727     if (Subtarget.hasBWI() && Subtarget.hasVLX())
   1728       return MVT::getVectorVT(MVT::i1, NumElts);
   1729 
   1730     if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
   1731       EVT LegalVT = getTypeToTransformTo(Context, VT);
   1732       EltVT = LegalVT.getVectorElementType().getSimpleVT();
   1733     }
   1734 
   1735     if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
   1736       switch(NumElts) {
   1737       case 2: return MVT::v2i1;
   1738       case 4: return MVT::v4i1;
   1739       case 8: return MVT::v8i1;
   1740       }
   1741   }
   1742 
   1743   return VT.changeVectorElementTypeToInteger();
   1744 }
   1745 
   1746 /// Helper for getByValTypeAlignment to determine
   1747 /// the desired ByVal argument alignment.
   1748 static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
   1749   if (MaxAlign == 16)
   1750     return;
   1751   if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
   1752     if (VTy->getBitWidth() == 128)
   1753       MaxAlign = 16;
   1754   } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
   1755     unsigned EltAlign = 0;
   1756     getMaxByValAlign(ATy->getElementType(), EltAlign);
   1757     if (EltAlign > MaxAlign)
   1758       MaxAlign = EltAlign;
   1759   } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
   1760     for (auto *EltTy : STy->elements()) {
   1761       unsigned EltAlign = 0;
   1762       getMaxByValAlign(EltTy, EltAlign);
   1763       if (EltAlign > MaxAlign)
   1764         MaxAlign = EltAlign;
   1765       if (MaxAlign == 16)
   1766         break;
   1767     }
   1768   }
   1769 }
   1770 
   1771 /// Return the desired alignment for ByVal aggregate
   1772 /// function arguments in the caller parameter area. For X86, aggregates
   1773 /// that contain SSE vectors are placed at 16-byte boundaries while the rest
   1774 /// are at 4-byte boundaries.
   1775 unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
   1776                                                   const DataLayout &DL) const {
   1777   if (Subtarget.is64Bit()) {
   1778     // Max of 8 and alignment of type.
   1779     unsigned TyAlign = DL.getABITypeAlignment(Ty);
   1780     if (TyAlign > 8)
   1781       return TyAlign;
   1782     return 8;
   1783   }
   1784 
   1785   unsigned Align = 4;
   1786   if (Subtarget.hasSSE1())
   1787     getMaxByValAlign(Ty, Align);
   1788   return Align;
   1789 }
   1790 
   1791 /// Returns the target specific optimal type for load
   1792 /// and store operations as a result of memset, memcpy, and memmove
   1793 /// lowering. If DstAlign is zero that means it's safe to destination
   1794 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
   1795 /// means there isn't a need to check it against alignment requirement,
   1796 /// probably because the source does not need to be loaded. If 'IsMemset' is
   1797 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
   1798 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
   1799 /// source is constant so it does not need to be loaded.
   1800 /// It returns EVT::Other if the type should be determined using generic
   1801 /// target-independent logic.
   1802 EVT
   1803 X86TargetLowering::getOptimalMemOpType(uint64_t Size,
   1804                                        unsigned DstAlign, unsigned SrcAlign,
   1805                                        bool IsMemset, bool ZeroMemset,
   1806                                        bool MemcpyStrSrc,
   1807                                        MachineFunction &MF) const {
   1808   const Function *F = MF.getFunction();
   1809   if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
   1810     if (Size >= 16 &&
   1811         (!Subtarget.isUnalignedMem16Slow() ||
   1812          ((DstAlign == 0 || DstAlign >= 16) &&
   1813           (SrcAlign == 0 || SrcAlign >= 16)))) {
   1814       // FIXME: Check if unaligned 32-byte accesses are slow.
   1815       if (Size >= 32 && Subtarget.hasAVX()) {
   1816         // Although this isn't a well-supported type for AVX1, we'll let
   1817         // legalization and shuffle lowering produce the optimal codegen. If we
   1818         // choose an optimal type with a vector element larger than a byte,
   1819         // getMemsetStores() may create an intermediate splat (using an integer
   1820         // multiply) before we splat as a vector.
   1821         return MVT::v32i8;
   1822       }
   1823       if (Subtarget.hasSSE2())
   1824         return MVT::v16i8;
   1825       // TODO: Can SSE1 handle a byte vector?
   1826       if (Subtarget.hasSSE1())
   1827         return MVT::v4f32;
   1828     } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
   1829                !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
   1830       // Do not use f64 to lower memcpy if source is string constant. It's
   1831       // better to use i32 to avoid the loads.
   1832       // Also, do not use f64 to lower memset unless this is a memset of zeros.
   1833       // The gymnastics of splatting a byte value into an XMM register and then
   1834       // only using 8-byte stores (because this is a CPU with slow unaligned
   1835       // 16-byte accesses) makes that a loser.
   1836       return MVT::f64;
   1837     }
   1838   }
   1839   // This is a compromise. If we reach here, unaligned accesses may be slow on
   1840   // this target. However, creating smaller, aligned accesses could be even
   1841   // slower and would certainly be a lot more code.
   1842   if (Subtarget.is64Bit() && Size >= 8)
   1843     return MVT::i64;
   1844   return MVT::i32;
   1845 }
   1846 
   1847 bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
   1848   if (VT == MVT::f32)
   1849     return X86ScalarSSEf32;
   1850   else if (VT == MVT::f64)
   1851     return X86ScalarSSEf64;
   1852   return true;
   1853 }
   1854 
   1855 bool
   1856 X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   1857                                                   unsigned,
   1858                                                   unsigned,
   1859                                                   bool *Fast) const {
   1860   if (Fast) {
   1861     switch (VT.getSizeInBits()) {
   1862     default:
   1863       // 8-byte and under are always assumed to be fast.
   1864       *Fast = true;
   1865       break;
   1866     case 128:
   1867       *Fast = !Subtarget.isUnalignedMem16Slow();
   1868       break;
   1869     case 256:
   1870       *Fast = !Subtarget.isUnalignedMem32Slow();
   1871       break;
   1872     // TODO: What about AVX-512 (512-bit) accesses?
   1873     }
   1874   }
   1875   // Misaligned accesses of any size are always allowed.
   1876   return true;
   1877 }
   1878 
   1879 /// Return the entry encoding for a jump table in the
   1880 /// current function.  The returned value is a member of the
   1881 /// MachineJumpTableInfo::JTEntryKind enum.
   1882 unsigned X86TargetLowering::getJumpTableEncoding() const {
   1883   // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
   1884   // symbol.
   1885   if (isPositionIndependent() && Subtarget.isPICStyleGOT())
   1886     return MachineJumpTableInfo::EK_Custom32;
   1887 
   1888   // Otherwise, use the normal jump table encoding heuristics.
   1889   return TargetLowering::getJumpTableEncoding();
   1890 }
   1891 
   1892 bool X86TargetLowering::useSoftFloat() const {
   1893   return Subtarget.useSoftFloat();
   1894 }
   1895 
   1896 const MCExpr *
   1897 X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
   1898                                              const MachineBasicBlock *MBB,
   1899                                              unsigned uid,MCContext &Ctx) const{
   1900   assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
   1901   // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
   1902   // entries.
   1903   return MCSymbolRefExpr::create(MBB->getSymbol(),
   1904                                  MCSymbolRefExpr::VK_GOTOFF, Ctx);
   1905 }
   1906 
   1907 /// Returns relocation base for the given PIC jumptable.
   1908 SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
   1909                                                     SelectionDAG &DAG) const {
   1910   if (!Subtarget.is64Bit())
   1911     // This doesn't have SDLoc associated with it, but is not really the
   1912     // same as a Register.
   1913     return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
   1914                        getPointerTy(DAG.getDataLayout()));
   1915   return Table;
   1916 }
   1917 
   1918 /// This returns the relocation base for the given PIC jumptable,
   1919 /// the same as getPICJumpTableRelocBase, but as an MCExpr.
   1920 const MCExpr *X86TargetLowering::
   1921 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
   1922                              MCContext &Ctx) const {
   1923   // X86-64 uses RIP relative addressing based on the jump table label.
   1924   if (Subtarget.isPICStyleRIPRel())
   1925     return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
   1926 
   1927   // Otherwise, the reference is relative to the PIC base.
   1928   return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
   1929 }
   1930 
   1931 std::pair<const TargetRegisterClass *, uint8_t>
   1932 X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
   1933                                            MVT VT) const {
   1934   const TargetRegisterClass *RRC = nullptr;
   1935   uint8_t Cost = 1;
   1936   switch (VT.SimpleTy) {
   1937   default:
   1938     return TargetLowering::findRepresentativeClass(TRI, VT);
   1939   case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
   1940     RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
   1941     break;
   1942   case MVT::x86mmx:
   1943     RRC = &X86::VR64RegClass;
   1944     break;
   1945   case MVT::f32: case MVT::f64:
   1946   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
   1947   case MVT::v4f32: case MVT::v2f64:
   1948   case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
   1949   case MVT::v4f64:
   1950     RRC = &X86::VR128RegClass;
   1951     break;
   1952   }
   1953   return std::make_pair(RRC, Cost);
   1954 }
   1955 
   1956 unsigned X86TargetLowering::getAddressSpace() const {
   1957   if (Subtarget.is64Bit())
   1958     return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
   1959   return 256;
   1960 }
   1961 
   1962 Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
   1963   // glibc has a special slot for the stack guard in tcbhead_t, use it instead
   1964   // of the usual global variable (see sysdeps/{i386,x86_64}/nptl/tls.h)
   1965   if (!Subtarget.isTargetGlibc())
   1966     return TargetLowering::getIRStackGuard(IRB);
   1967 
   1968   // %fs:0x28, unless we're using a Kernel code model, in which case it's %gs:
   1969   // %gs:0x14 on i386
   1970   unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
   1971   unsigned AddressSpace = getAddressSpace();
   1972   return ConstantExpr::getIntToPtr(
   1973       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
   1974       Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
   1975 }
   1976 
   1977 void X86TargetLowering::insertSSPDeclarations(Module &M) const {
   1978   // MSVC CRT provides functionalities for stack protection.
   1979   if (Subtarget.getTargetTriple().isOSMSVCRT()) {
   1980     // MSVC CRT has a global variable holding security cookie.
   1981     M.getOrInsertGlobal("__security_cookie",
   1982                         Type::getInt8PtrTy(M.getContext()));
   1983 
   1984     // MSVC CRT has a function to validate security cookie.
   1985     auto *SecurityCheckCookie = cast<Function>(
   1986         M.getOrInsertFunction("__security_check_cookie",
   1987                               Type::getVoidTy(M.getContext()),
   1988                               Type::getInt8PtrTy(M.getContext()), nullptr));
   1989     SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
   1990     SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
   1991     return;
   1992   }
   1993   // glibc has a special slot for the stack guard.
   1994   if (Subtarget.isTargetGlibc())
   1995     return;
   1996   TargetLowering::insertSSPDeclarations(M);
   1997 }
   1998 
   1999 Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
   2000   // MSVC CRT has a global variable holding security cookie.
   2001   if (Subtarget.getTargetTriple().isOSMSVCRT())
   2002     return M.getGlobalVariable("__security_cookie");
   2003   return TargetLowering::getSDagStackGuard(M);
   2004 }
   2005 
   2006 Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
   2007   // MSVC CRT has a function to validate security cookie.
   2008   if (Subtarget.getTargetTriple().isOSMSVCRT())
   2009     return M.getFunction("__security_check_cookie");
   2010   return TargetLowering::getSSPStackGuardCheck(M);
   2011 }
   2012 
   2013 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
   2014   if (!Subtarget.isTargetAndroid())
   2015     return TargetLowering::getSafeStackPointerLocation(IRB);
   2016 
   2017   // Android provides a fixed TLS slot for the SafeStack pointer. See the
   2018   // definition of TLS_SLOT_SAFESTACK in
   2019   // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
   2020   unsigned AddressSpace, Offset;
   2021 
   2022   // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
   2023   // %gs:0x24 on i386
   2024   Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
   2025   AddressSpace = getAddressSpace();
   2026   return ConstantExpr::getIntToPtr(
   2027       ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
   2028       Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
   2029 }
   2030 
   2031 bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
   2032                                             unsigned DestAS) const {
   2033   assert(SrcAS != DestAS && "Expected different address spaces!");
   2034 
   2035   return SrcAS < 256 && DestAS < 256;
   2036 }
   2037 
   2038 //===----------------------------------------------------------------------===//
   2039 //               Return Value Calling Convention Implementation
   2040 //===----------------------------------------------------------------------===//
   2041 
   2042 #include "X86GenCallingConv.inc"
   2043 
   2044 bool X86TargetLowering::CanLowerReturn(
   2045     CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
   2046     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
   2047   SmallVector<CCValAssign, 16> RVLocs;
   2048   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
   2049   return CCInfo.CheckReturn(Outs, RetCC_X86);
   2050 }
   2051 
   2052 const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
   2053   static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
   2054   return ScratchRegs;
   2055 }
   2056 
   2057 SDValue
   2058 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   2059                                bool isVarArg,
   2060                                const SmallVectorImpl<ISD::OutputArg> &Outs,
   2061                                const SmallVectorImpl<SDValue> &OutVals,
   2062                                const SDLoc &dl, SelectionDAG &DAG) const {
   2063   MachineFunction &MF = DAG.getMachineFunction();
   2064   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   2065 
   2066   if (CallConv == CallingConv::X86_INTR && !Outs.empty())
   2067     report_fatal_error("X86 interrupts may not return any value");
   2068 
   2069   SmallVector<CCValAssign, 16> RVLocs;
   2070   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
   2071   CCInfo.AnalyzeReturn(Outs, RetCC_X86);
   2072 
   2073   SDValue Flag;
   2074   SmallVector<SDValue, 6> RetOps;
   2075   RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
   2076   // Operand #1 = Bytes To Pop
   2077   RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
   2078                    MVT::i32));
   2079 
   2080   // Copy the result values into the output registers.
   2081   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
   2082     CCValAssign &VA = RVLocs[i];
   2083     assert(VA.isRegLoc() && "Can only return in registers!");
   2084     SDValue ValToCopy = OutVals[i];
   2085     EVT ValVT = ValToCopy.getValueType();
   2086 
   2087     // Promote values to the appropriate types.
   2088     if (VA.getLocInfo() == CCValAssign::SExt)
   2089       ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
   2090     else if (VA.getLocInfo() == CCValAssign::ZExt)
   2091       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
   2092     else if (VA.getLocInfo() == CCValAssign::AExt) {
   2093       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
   2094         ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
   2095       else
   2096         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
   2097     }
   2098     else if (VA.getLocInfo() == CCValAssign::BCvt)
   2099       ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
   2100 
   2101     assert(VA.getLocInfo() != CCValAssign::FPExt &&
   2102            "Unexpected FP-extend for return value.");
   2103 
   2104     // If this is x86-64, and we disabled SSE, we can't return FP values,
   2105     // or SSE or MMX vectors.
   2106     if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
   2107          VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
   2108           (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
   2109       report_fatal_error("SSE register return with SSE disabled");
   2110     }
   2111     // Likewise we can't return F64 values with SSE1 only.  gcc does so, but
   2112     // llvm-gcc has never done it right and no one has noticed, so this
   2113     // should be OK for now.
   2114     if (ValVT == MVT::f64 &&
   2115         (Subtarget.is64Bit() && !Subtarget.hasSSE2()))
   2116       report_fatal_error("SSE2 register return with SSE2 disabled");
   2117 
   2118     // Returns in ST0/ST1 are handled specially: these are pushed as operands to
   2119     // the RET instruction and handled by the FP Stackifier.
   2120     if (VA.getLocReg() == X86::FP0 ||
   2121         VA.getLocReg() == X86::FP1) {
   2122       // If this is a copy from an xmm register to ST(0), use an FPExtend to
   2123       // change the value to the FP stack register class.
   2124       if (isScalarFPTypeInSSEReg(VA.getValVT()))
   2125         ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
   2126       RetOps.push_back(ValToCopy);
   2127       // Don't emit a copytoreg.
   2128       continue;
   2129     }
   2130 
   2131     // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
   2132     // which is returned in RAX / RDX.
   2133     if (Subtarget.is64Bit()) {
   2134       if (ValVT == MVT::x86mmx) {
   2135         if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
   2136           ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
   2137           ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
   2138                                   ValToCopy);
   2139           // If we don't have SSE2 available, convert to v4f32 so the generated
   2140           // register is legal.
   2141           if (!Subtarget.hasSSE2())
   2142             ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
   2143         }
   2144       }
   2145     }
   2146 
   2147     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
   2148     Flag = Chain.getValue(1);
   2149     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   2150   }
   2151 
   2152   // Swift calling convention does not require we copy the sret argument
   2153   // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
   2154 
   2155   // All x86 ABIs require that for returning structs by value we copy
   2156   // the sret argument into %rax/%eax (depending on ABI) for the return.
   2157   // We saved the argument into a virtual register in the entry block,
   2158   // so now we copy the value out and into %rax/%eax.
   2159   //
   2160   // Checking Function.hasStructRetAttr() here is insufficient because the IR
   2161   // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
   2162   // false, then an sret argument may be implicitly inserted in the SelDAG. In
   2163   // either case FuncInfo->setSRetReturnReg() will have been called.
   2164   if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
   2165     // When we have both sret and another return value, we should use the
   2166     // original Chain stored in RetOps[0], instead of the current Chain updated
   2167     // in the above loop. If we only have sret, RetOps[0] equals to Chain.
   2168 
   2169     // For the case of sret and another return value, we have
   2170     //   Chain_0 at the function entry
   2171     //   Chain_1 = getCopyToReg(Chain_0) in the above loop
   2172     // If we use Chain_1 in getCopyFromReg, we will have
   2173     //   Val = getCopyFromReg(Chain_1)
   2174     //   Chain_2 = getCopyToReg(Chain_1, Val) from below
   2175 
   2176     // getCopyToReg(Chain_0) will be glued together with
   2177     // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
   2178     // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
   2179     //   Data dependency from Unit B to Unit A due to usage of Val in
   2180     //     getCopyToReg(Chain_1, Val)
   2181     //   Chain dependency from Unit A to Unit B
   2182 
   2183     // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
   2184     SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
   2185                                      getPointerTy(MF.getDataLayout()));
   2186 
   2187     unsigned RetValReg
   2188         = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
   2189           X86::RAX : X86::EAX;
   2190     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
   2191     Flag = Chain.getValue(1);
   2192 
   2193     // RAX/EAX now acts like a return value.
   2194     RetOps.push_back(
   2195         DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
   2196   }
   2197 
   2198   const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
   2199   const MCPhysReg *I =
   2200       TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
   2201   if (I) {
   2202     for (; *I; ++I) {
   2203       if (X86::GR64RegClass.contains(*I))
   2204         RetOps.push_back(DAG.getRegister(*I, MVT::i64));
   2205       else
   2206         llvm_unreachable("Unexpected register class in CSRsViaCopy!");
   2207     }
   2208   }
   2209 
   2210   RetOps[0] = Chain;  // Update chain.
   2211 
   2212   // Add the flag if we have it.
   2213   if (Flag.getNode())
   2214     RetOps.push_back(Flag);
   2215 
   2216   X86ISD::NodeType opcode = X86ISD::RET_FLAG;
   2217   if (CallConv == CallingConv::X86_INTR)
   2218     opcode = X86ISD::IRET;
   2219   return DAG.getNode(opcode, dl, MVT::Other, RetOps);
   2220 }
   2221 
   2222 bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
   2223   if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
   2224     return false;
   2225 
   2226   SDValue TCChain = Chain;
   2227   SDNode *Copy = *N->use_begin();
   2228   if (Copy->getOpcode() == ISD::CopyToReg) {
   2229     // If the copy has a glue operand, we conservatively assume it isn't safe to
   2230     // perform a tail call.
   2231     if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
   2232       return false;
   2233     TCChain = Copy->getOperand(0);
   2234   } else if (Copy->getOpcode() != ISD::FP_EXTEND)
   2235     return false;
   2236 
   2237   bool HasRet = false;
   2238   for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
   2239        UI != UE; ++UI) {
   2240     if (UI->getOpcode() != X86ISD::RET_FLAG)
   2241       return false;
   2242     // If we are returning more than one value, we can definitely
   2243     // not make a tail call see PR19530
   2244     if (UI->getNumOperands() > 4)
   2245       return false;
   2246     if (UI->getNumOperands() == 4 &&
   2247         UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
   2248       return false;
   2249     HasRet = true;
   2250   }
   2251 
   2252   if (!HasRet)
   2253     return false;
   2254 
   2255   Chain = TCChain;
   2256   return true;
   2257 }
   2258 
   2259 EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
   2260                                            ISD::NodeType ExtendKind) const {
   2261   MVT ReturnMVT = MVT::i32;
   2262 
   2263   bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
   2264   if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
   2265     // The ABI does not require i1, i8 or i16 to be extended.
   2266     //
   2267     // On Darwin, there is code in the wild relying on Clang's old behaviour of
   2268     // always extending i8/i16 return values, so keep doing that for now.
   2269     // (PR26665).
   2270     ReturnMVT = MVT::i8;
   2271   }
   2272 
   2273   EVT MinVT = getRegisterType(Context, ReturnMVT);
   2274   return VT.bitsLT(MinVT) ? MinVT : VT;
   2275 }
   2276 
   2277 /// Lower the result values of a call into the
   2278 /// appropriate copies out of appropriate physical registers.
   2279 ///
   2280 SDValue X86TargetLowering::LowerCallResult(
   2281     SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
   2282     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
   2283     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   2284 
   2285   // Assign locations to each value returned by this call.
   2286   SmallVector<CCValAssign, 16> RVLocs;
   2287   bool Is64Bit = Subtarget.is64Bit();
   2288   CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
   2289                  *DAG.getContext());
   2290   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
   2291 
   2292   // Copy all of the result registers out of their specified physreg.
   2293   for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
   2294     CCValAssign &VA = RVLocs[i];
   2295     EVT CopyVT = VA.getLocVT();
   2296 
   2297     // If this is x86-64, and we disabled SSE, we can't return FP values
   2298     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
   2299         ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
   2300       report_fatal_error("SSE register return with SSE disabled");
   2301     }
   2302 
   2303     // If we prefer to use the value in xmm registers, copy it out as f80 and
   2304     // use a truncate to move it from fp stack reg to xmm reg.
   2305     bool RoundAfterCopy = false;
   2306     if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
   2307         isScalarFPTypeInSSEReg(VA.getValVT())) {
   2308       if (!Subtarget.hasX87())
   2309         report_fatal_error("X87 register return with X87 disabled");
   2310       CopyVT = MVT::f80;
   2311       RoundAfterCopy = (CopyVT != VA.getLocVT());
   2312     }
   2313 
   2314     Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
   2315                                CopyVT, InFlag).getValue(1);
   2316     SDValue Val = Chain.getValue(0);
   2317 
   2318     if (RoundAfterCopy)
   2319       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
   2320                         // This truncation won't change the value.
   2321                         DAG.getIntPtrConstant(1, dl));
   2322 
   2323     if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
   2324       Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
   2325 
   2326     InFlag = Chain.getValue(2);
   2327     InVals.push_back(Val);
   2328   }
   2329 
   2330   return Chain;
   2331 }
   2332 
   2333 //===----------------------------------------------------------------------===//
   2334 //                C & StdCall & Fast Calling Convention implementation
   2335 //===----------------------------------------------------------------------===//
   2336 //  StdCall calling convention seems to be standard for many Windows' API
   2337 //  routines and around. It differs from C calling convention just a little:
   2338 //  callee should clean up the stack, not caller. Symbols should be also
   2339 //  decorated in some fancy way :) It doesn't support any vector arguments.
   2340 //  For info on fast calling convention see Fast Calling Convention (tail call)
   2341 //  implementation LowerX86_32FastCCCallTo.
   2342 
   2343 /// CallIsStructReturn - Determines whether a call uses struct return
   2344 /// semantics.
   2345 enum StructReturnType {
   2346   NotStructReturn,
   2347   RegStructReturn,
   2348   StackStructReturn
   2349 };
   2350 static StructReturnType
   2351 callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
   2352   if (Outs.empty())
   2353     return NotStructReturn;
   2354 
   2355   const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
   2356   if (!Flags.isSRet())
   2357     return NotStructReturn;
   2358   if (Flags.isInReg() || IsMCU)
   2359     return RegStructReturn;
   2360   return StackStructReturn;
   2361 }
   2362 
   2363 /// Determines whether a function uses struct return semantics.
   2364 static StructReturnType
   2365 argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
   2366   if (Ins.empty())
   2367     return NotStructReturn;
   2368 
   2369   const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
   2370   if (!Flags.isSRet())
   2371     return NotStructReturn;
   2372   if (Flags.isInReg() || IsMCU)
   2373     return RegStructReturn;
   2374   return StackStructReturn;
   2375 }
   2376 
   2377 /// Make a copy of an aggregate at address specified by "Src" to address
   2378 /// "Dst" with size and alignment information specified by the specific
   2379 /// parameter attribute. The copy will be passed as a byval function parameter.
   2380 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
   2381                                          SDValue Chain, ISD::ArgFlagsTy Flags,
   2382                                          SelectionDAG &DAG, const SDLoc &dl) {
   2383   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
   2384 
   2385   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
   2386                        /*isVolatile*/false, /*AlwaysInline=*/true,
   2387                        /*isTailCall*/false,
   2388                        MachinePointerInfo(), MachinePointerInfo());
   2389 }
   2390 
   2391 /// Return true if the calling convention is one that we can guarantee TCO for.
   2392 static bool canGuaranteeTCO(CallingConv::ID CC) {
   2393   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
   2394           CC == CallingConv::HiPE || CC == CallingConv::HHVM);
   2395 }
   2396 
   2397 /// Return true if we might ever do TCO for calls with this calling convention.
   2398 static bool mayTailCallThisCC(CallingConv::ID CC) {
   2399   switch (CC) {
   2400   // C calling conventions:
   2401   case CallingConv::C:
   2402   case CallingConv::X86_64_Win64:
   2403   case CallingConv::X86_64_SysV:
   2404   // Callee pop conventions:
   2405   case CallingConv::X86_ThisCall:
   2406   case CallingConv::X86_StdCall:
   2407   case CallingConv::X86_VectorCall:
   2408   case CallingConv::X86_FastCall:
   2409     return true;
   2410   default:
   2411     return canGuaranteeTCO(CC);
   2412   }
   2413 }
   2414 
   2415 /// Return true if the function is being made into a tailcall target by
   2416 /// changing its ABI.
   2417 static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
   2418   return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
   2419 }
   2420 
   2421 bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
   2422   auto Attr =
   2423       CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
   2424   if (!CI->isTailCall() || Attr.getValueAsString() == "true")
   2425     return false;
   2426 
   2427   CallSite CS(CI);
   2428   CallingConv::ID CalleeCC = CS.getCallingConv();
   2429   if (!mayTailCallThisCC(CalleeCC))
   2430     return false;
   2431 
   2432   return true;
   2433 }
   2434 
   2435 SDValue
   2436 X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
   2437                                     const SmallVectorImpl<ISD::InputArg> &Ins,
   2438                                     const SDLoc &dl, SelectionDAG &DAG,
   2439                                     const CCValAssign &VA,
   2440                                     MachineFrameInfo *MFI, unsigned i) const {
   2441   // Create the nodes corresponding to a load from this parameter slot.
   2442   ISD::ArgFlagsTy Flags = Ins[i].Flags;
   2443   bool AlwaysUseMutable = shouldGuaranteeTCO(
   2444       CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
   2445   bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
   2446   EVT ValVT;
   2447 
   2448   // If value is passed by pointer we have address passed instead of the value
   2449   // itself.
   2450   bool ExtendedInMem = VA.isExtInLoc() &&
   2451     VA.getValVT().getScalarType() == MVT::i1;
   2452 
   2453   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
   2454     ValVT = VA.getLocVT();
   2455   else
   2456     ValVT = VA.getValVT();
   2457 
   2458   // Calculate SP offset of interrupt parameter, re-arrange the slot normally
   2459   // taken by a return address.
   2460   int Offset = 0;
   2461   if (CallConv == CallingConv::X86_INTR) {
   2462     const X86Subtarget& Subtarget =
   2463         static_cast<const X86Subtarget&>(DAG.getSubtarget());
   2464     // X86 interrupts may take one or two arguments.
   2465     // On the stack there will be no return address as in regular call.
   2466     // Offset of last argument need to be set to -4/-8 bytes.
   2467     // Where offset of the first argument out of two, should be set to 0 bytes.
   2468     Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
   2469   }
   2470 
   2471   // FIXME: For now, all byval parameter objects are marked mutable. This can be
   2472   // changed with more analysis.
   2473   // In case of tail call optimization mark all arguments mutable. Since they
   2474   // could be overwritten by lowering of arguments in case of a tail call.
   2475   if (Flags.isByVal()) {
   2476     unsigned Bytes = Flags.getByValSize();
   2477     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
   2478     int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
   2479     // Adjust SP offset of interrupt parameter.
   2480     if (CallConv == CallingConv::X86_INTR) {
   2481       MFI->setObjectOffset(FI, Offset);
   2482     }
   2483     return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   2484   } else {
   2485     int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
   2486                                     VA.getLocMemOffset(), isImmutable);
   2487 
   2488     // Set SExt or ZExt flag.
   2489     if (VA.getLocInfo() == CCValAssign::ZExt) {
   2490       MFI->setObjectZExt(FI, true);
   2491     } else if (VA.getLocInfo() == CCValAssign::SExt) {
   2492       MFI->setObjectSExt(FI, true);
   2493     }
   2494 
   2495     // Adjust SP offset of interrupt parameter.
   2496     if (CallConv == CallingConv::X86_INTR) {
   2497       MFI->setObjectOffset(FI, Offset);
   2498     }
   2499 
   2500     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   2501     SDValue Val = DAG.getLoad(
   2502         ValVT, dl, Chain, FIN,
   2503         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), false,
   2504         false, false, 0);
   2505     return ExtendedInMem ?
   2506       DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val) : Val;
   2507   }
   2508 }
   2509 
   2510 // FIXME: Get this from tablegen.
   2511 static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
   2512                                                 const X86Subtarget &Subtarget) {
   2513   assert(Subtarget.is64Bit());
   2514 
   2515   if (Subtarget.isCallingConvWin64(CallConv)) {
   2516     static const MCPhysReg GPR64ArgRegsWin64[] = {
   2517       X86::RCX, X86::RDX, X86::R8,  X86::R9
   2518     };
   2519     return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
   2520   }
   2521 
   2522   static const MCPhysReg GPR64ArgRegs64Bit[] = {
   2523     X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
   2524   };
   2525   return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
   2526 }
   2527 
   2528 // FIXME: Get this from tablegen.
   2529 static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
   2530                                                 CallingConv::ID CallConv,
   2531                                                 const X86Subtarget &Subtarget) {
   2532   assert(Subtarget.is64Bit());
   2533   if (Subtarget.isCallingConvWin64(CallConv)) {
   2534     // The XMM registers which might contain var arg parameters are shadowed
   2535     // in their paired GPR.  So we only need to save the GPR to their home
   2536     // slots.
   2537     // TODO: __vectorcall will change this.
   2538     return None;
   2539   }
   2540 
   2541   const Function *Fn = MF.getFunction();
   2542   bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
   2543   bool isSoftFloat = Subtarget.useSoftFloat();
   2544   assert(!(isSoftFloat && NoImplicitFloatOps) &&
   2545          "SSE register cannot be used when SSE is disabled!");
   2546   if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
   2547     // Kernel mode asks for SSE to be disabled, so there are no XMM argument
   2548     // registers.
   2549     return None;
   2550 
   2551   static const MCPhysReg XMMArgRegs64Bit[] = {
   2552     X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
   2553     X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
   2554   };
   2555   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
   2556 }
   2557 
   2558 SDValue X86TargetLowering::LowerFormalArguments(
   2559     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
   2560     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
   2561     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   2562   MachineFunction &MF = DAG.getMachineFunction();
   2563   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   2564   const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
   2565 
   2566   const Function *Fn = MF.getFunction();
   2567   if (Fn->hasExternalLinkage() &&
   2568       Subtarget.isTargetCygMing() &&
   2569       Fn->getName() == "main")
   2570     FuncInfo->setForceFramePointer(true);
   2571 
   2572   MachineFrameInfo *MFI = MF.getFrameInfo();
   2573   bool Is64Bit = Subtarget.is64Bit();
   2574   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
   2575 
   2576   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
   2577          "Var args not supported with calling convention fastcc, ghc or hipe");
   2578 
   2579   if (CallConv == CallingConv::X86_INTR) {
   2580     bool isLegal = Ins.size() == 1 ||
   2581                    (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
   2582                                         (!Is64Bit && Ins[1].VT == MVT::i32)));
   2583     if (!isLegal)
   2584       report_fatal_error("X86 interrupts may take one or two arguments");
   2585   }
   2586 
   2587   // Assign locations to all of the incoming arguments.
   2588   SmallVector<CCValAssign, 16> ArgLocs;
   2589   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
   2590 
   2591   // Allocate shadow area for Win64
   2592   if (IsWin64)
   2593     CCInfo.AllocateStack(32, 8);
   2594 
   2595   CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
   2596 
   2597   unsigned LastVal = ~0U;
   2598   SDValue ArgValue;
   2599   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2600     CCValAssign &VA = ArgLocs[i];
   2601     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
   2602     // places.
   2603     assert(VA.getValNo() != LastVal &&
   2604            "Don't support value assigned to multiple locs yet");
   2605     (void)LastVal;
   2606     LastVal = VA.getValNo();
   2607 
   2608     if (VA.isRegLoc()) {
   2609       EVT RegVT = VA.getLocVT();
   2610       const TargetRegisterClass *RC;
   2611       if (RegVT == MVT::i32)
   2612         RC = &X86::GR32RegClass;
   2613       else if (Is64Bit && RegVT == MVT::i64)
   2614         RC = &X86::GR64RegClass;
   2615       else if (RegVT == MVT::f32)
   2616         RC = &X86::FR32RegClass;
   2617       else if (RegVT == MVT::f64)
   2618         RC = &X86::FR64RegClass;
   2619       else if (RegVT == MVT::f128)
   2620         RC = &X86::FR128RegClass;
   2621       else if (RegVT.is512BitVector())
   2622         RC = &X86::VR512RegClass;
   2623       else if (RegVT.is256BitVector())
   2624         RC = &X86::VR256RegClass;
   2625       else if (RegVT.is128BitVector())
   2626         RC = &X86::VR128RegClass;
   2627       else if (RegVT == MVT::x86mmx)
   2628         RC = &X86::VR64RegClass;
   2629       else if (RegVT == MVT::i1)
   2630         RC = &X86::VK1RegClass;
   2631       else if (RegVT == MVT::v8i1)
   2632         RC = &X86::VK8RegClass;
   2633       else if (RegVT == MVT::v16i1)
   2634         RC = &X86::VK16RegClass;
   2635       else if (RegVT == MVT::v32i1)
   2636         RC = &X86::VK32RegClass;
   2637       else if (RegVT == MVT::v64i1)
   2638         RC = &X86::VK64RegClass;
   2639       else
   2640         llvm_unreachable("Unknown argument type!");
   2641 
   2642       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
   2643       ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
   2644 
   2645       // If this is an 8 or 16-bit value, it is really passed promoted to 32
   2646       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
   2647       // right size.
   2648       if (VA.getLocInfo() == CCValAssign::SExt)
   2649         ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
   2650                                DAG.getValueType(VA.getValVT()));
   2651       else if (VA.getLocInfo() == CCValAssign::ZExt)
   2652         ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
   2653                                DAG.getValueType(VA.getValVT()));
   2654       else if (VA.getLocInfo() == CCValAssign::BCvt)
   2655         ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
   2656 
   2657       if (VA.isExtInLoc()) {
   2658         // Handle MMX values passed in XMM regs.
   2659         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
   2660           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
   2661         else
   2662           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
   2663       }
   2664     } else {
   2665       assert(VA.isMemLoc());
   2666       ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
   2667     }
   2668 
   2669     // If value is passed via pointer - do a load.
   2670     if (VA.getLocInfo() == CCValAssign::Indirect)
   2671       ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue,
   2672                              MachinePointerInfo(), false, false, false, 0);
   2673 
   2674     InVals.push_back(ArgValue);
   2675   }
   2676 
   2677   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   2678     // Swift calling convention does not require we copy the sret argument
   2679     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
   2680     if (CallConv == CallingConv::Swift)
   2681       continue;
   2682 
   2683     // All x86 ABIs require that for returning structs by value we copy the
   2684     // sret argument into %rax/%eax (depending on ABI) for the return. Save
   2685     // the argument into a virtual register so that we can access it from the
   2686     // return points.
   2687     if (Ins[i].Flags.isSRet()) {
   2688       unsigned Reg = FuncInfo->getSRetReturnReg();
   2689       if (!Reg) {
   2690         MVT PtrTy = getPointerTy(DAG.getDataLayout());
   2691         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
   2692         FuncInfo->setSRetReturnReg(Reg);
   2693       }
   2694       SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
   2695       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
   2696       break;
   2697     }
   2698   }
   2699 
   2700   unsigned StackSize = CCInfo.getNextStackOffset();
   2701   // Align stack specially for tail calls.
   2702   if (shouldGuaranteeTCO(CallConv,
   2703                          MF.getTarget().Options.GuaranteedTailCallOpt))
   2704     StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
   2705 
   2706   // If the function takes variable number of arguments, make a frame index for
   2707   // the start of the first vararg value... for expansion of llvm.va_start. We
   2708   // can skip this if there are no va_start calls.
   2709   if (MFI->hasVAStart() &&
   2710       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
   2711                    CallConv != CallingConv::X86_ThisCall))) {
   2712     FuncInfo->setVarArgsFrameIndex(
   2713         MFI->CreateFixedObject(1, StackSize, true));
   2714   }
   2715 
   2716   // Figure out if XMM registers are in use.
   2717   assert(!(Subtarget.useSoftFloat() &&
   2718            Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&
   2719          "SSE register cannot be used when SSE is disabled!");
   2720 
   2721   // 64-bit calling conventions support varargs and register parameters, so we
   2722   // have to do extra work to spill them in the prologue.
   2723   if (Is64Bit && isVarArg && MFI->hasVAStart()) {
   2724     // Find the first unallocated argument registers.
   2725     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
   2726     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
   2727     unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
   2728     unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
   2729     assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
   2730            "SSE register cannot be used when SSE is disabled!");
   2731 
   2732     // Gather all the live in physical registers.
   2733     SmallVector<SDValue, 6> LiveGPRs;
   2734     SmallVector<SDValue, 8> LiveXMMRegs;
   2735     SDValue ALVal;
   2736     for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
   2737       unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
   2738       LiveGPRs.push_back(
   2739           DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
   2740     }
   2741     if (!ArgXMMs.empty()) {
   2742       unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
   2743       ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
   2744       for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
   2745         unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
   2746         LiveXMMRegs.push_back(
   2747             DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
   2748       }
   2749     }
   2750 
   2751     if (IsWin64) {
   2752       // Get to the caller-allocated home save location.  Add 8 to account
   2753       // for the return address.
   2754       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
   2755       FuncInfo->setRegSaveFrameIndex(
   2756           MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
   2757       // Fixup to set vararg frame on shadow area (4 x i64).
   2758       if (NumIntRegs < 4)
   2759         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
   2760     } else {
   2761       // For X86-64, if there are vararg parameters that are passed via
   2762       // registers, then we must store them to their spots on the stack so
   2763       // they may be loaded by dereferencing the result of va_next.
   2764       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
   2765       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
   2766       FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
   2767           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
   2768     }
   2769 
   2770     // Store the integer parameter registers.
   2771     SmallVector<SDValue, 8> MemOps;
   2772     SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
   2773                                       getPointerTy(DAG.getDataLayout()));
   2774     unsigned Offset = FuncInfo->getVarArgsGPOffset();
   2775     for (SDValue Val : LiveGPRs) {
   2776       SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
   2777                                 RSFIN, DAG.getIntPtrConstant(Offset, dl));
   2778       SDValue Store =
   2779           DAG.getStore(Val.getValue(1), dl, Val, FIN,
   2780                        MachinePointerInfo::getFixedStack(
   2781                            DAG.getMachineFunction(),
   2782                            FuncInfo->getRegSaveFrameIndex(), Offset),
   2783                        false, false, 0);
   2784       MemOps.push_back(Store);
   2785       Offset += 8;
   2786     }
   2787 
   2788     if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
   2789       // Now store the XMM (fp + vector) parameter registers.
   2790       SmallVector<SDValue, 12> SaveXMMOps;
   2791       SaveXMMOps.push_back(Chain);
   2792       SaveXMMOps.push_back(ALVal);
   2793       SaveXMMOps.push_back(DAG.getIntPtrConstant(
   2794                              FuncInfo->getRegSaveFrameIndex(), dl));
   2795       SaveXMMOps.push_back(DAG.getIntPtrConstant(
   2796                              FuncInfo->getVarArgsFPOffset(), dl));
   2797       SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
   2798                         LiveXMMRegs.end());
   2799       MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
   2800                                    MVT::Other, SaveXMMOps));
   2801     }
   2802 
   2803     if (!MemOps.empty())
   2804       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
   2805   }
   2806 
   2807   if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
   2808     // Find the largest legal vector type.
   2809     MVT VecVT = MVT::Other;
   2810     // FIXME: Only some x86_32 calling conventions support AVX512.
   2811     if (Subtarget.hasAVX512() &&
   2812         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
   2813                      CallConv == CallingConv::Intel_OCL_BI)))
   2814       VecVT = MVT::v16f32;
   2815     else if (Subtarget.hasAVX())
   2816       VecVT = MVT::v8f32;
   2817     else if (Subtarget.hasSSE2())
   2818       VecVT = MVT::v4f32;
   2819 
   2820     // We forward some GPRs and some vector types.
   2821     SmallVector<MVT, 2> RegParmTypes;
   2822     MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
   2823     RegParmTypes.push_back(IntVT);
   2824     if (VecVT != MVT::Other)
   2825       RegParmTypes.push_back(VecVT);
   2826 
   2827     // Compute the set of forwarded registers. The rest are scratch.
   2828     SmallVectorImpl<ForwardedRegister> &Forwards =
   2829         FuncInfo->getForwardedMustTailRegParms();
   2830     CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
   2831 
   2832     // Conservatively forward AL on x86_64, since it might be used for varargs.
   2833     if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
   2834       unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
   2835       Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
   2836     }
   2837 
   2838     // Copy all forwards from physical to virtual registers.
   2839     for (ForwardedRegister &F : Forwards) {
   2840       // FIXME: Can we use a less constrained schedule?
   2841       SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
   2842       F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
   2843       Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
   2844     }
   2845   }
   2846 
   2847   // Some CCs need callee pop.
   2848   if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
   2849                        MF.getTarget().Options.GuaranteedTailCallOpt)) {
   2850     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   2851   } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
   2852     // X86 interrupts must pop the error code if present
   2853     FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
   2854   } else {
   2855     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
   2856     // If this is an sret function, the return should pop the hidden pointer.
   2857     if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
   2858         !Subtarget.getTargetTriple().isOSMSVCRT() &&
   2859         argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
   2860       FuncInfo->setBytesToPopOnReturn(4);
   2861   }
   2862 
   2863   if (!Is64Bit) {
   2864     // RegSaveFrameIndex is X86-64 only.
   2865     FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
   2866     if (CallConv == CallingConv::X86_FastCall ||
   2867         CallConv == CallingConv::X86_ThisCall)
   2868       // fastcc functions can't have varargs.
   2869       FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
   2870   }
   2871 
   2872   FuncInfo->setArgumentStackSize(StackSize);
   2873 
   2874   if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
   2875     EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
   2876     if (Personality == EHPersonality::CoreCLR) {
   2877       assert(Is64Bit);
   2878       // TODO: Add a mechanism to frame lowering that will allow us to indicate
   2879       // that we'd prefer this slot be allocated towards the bottom of the frame
   2880       // (i.e. near the stack pointer after allocating the frame).  Every
   2881       // funclet needs a copy of this slot in its (mostly empty) frame, and the
   2882       // offset from the bottom of this and each funclet's frame must be the
   2883       // same, so the size of funclets' (mostly empty) frames is dictated by
   2884       // how far this slot is from the bottom (since they allocate just enough
   2885       // space to accommodate holding this slot at the correct offset).
   2886       int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
   2887       EHInfo->PSPSymFrameIdx = PSPSymFI;
   2888     }
   2889   }
   2890 
   2891   return Chain;
   2892 }
   2893 
   2894 SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
   2895                                             SDValue Arg, const SDLoc &dl,
   2896                                             SelectionDAG &DAG,
   2897                                             const CCValAssign &VA,
   2898                                             ISD::ArgFlagsTy Flags) const {
   2899   unsigned LocMemOffset = VA.getLocMemOffset();
   2900   SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
   2901   PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
   2902                        StackPtr, PtrOff);
   2903   if (Flags.isByVal())
   2904     return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
   2905 
   2906   return DAG.getStore(
   2907       Chain, dl, Arg, PtrOff,
   2908       MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset),
   2909       false, false, 0);
   2910 }
   2911 
   2912 /// Emit a load of return address if tail call
   2913 /// optimization is performed and it is required.
   2914 SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
   2915     SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
   2916     bool Is64Bit, int FPDiff, const SDLoc &dl) const {
   2917   // Adjust the Return address stack slot.
   2918   EVT VT = getPointerTy(DAG.getDataLayout());
   2919   OutRetAddr = getReturnAddressFrameIndex(DAG);
   2920 
   2921   // Load the "old" Return address.
   2922   OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(),
   2923                            false, false, false, 0);
   2924   return SDValue(OutRetAddr.getNode(), 1);
   2925 }
   2926 
   2927 /// Emit a store of the return address if tail call
   2928 /// optimization is performed and it is required (FPDiff!=0).
   2929 static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
   2930                                         SDValue Chain, SDValue RetAddrFrIdx,
   2931                                         EVT PtrVT, unsigned SlotSize,
   2932                                         int FPDiff, const SDLoc &dl) {
   2933   // Store the return address to the appropriate stack slot.
   2934   if (!FPDiff) return Chain;
   2935   // Calculate the new stack slot for the return address.
   2936   int NewReturnAddrFI =
   2937     MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
   2938                                          false);
   2939   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
   2940   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
   2941                        MachinePointerInfo::getFixedStack(
   2942                            DAG.getMachineFunction(), NewReturnAddrFI),
   2943                        false, false, 0);
   2944   return Chain;
   2945 }
   2946 
   2947 /// Returns a vector_shuffle mask for an movs{s|d}, movd
   2948 /// operation of specified width.
   2949 static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
   2950                        SDValue V2) {
   2951   unsigned NumElems = VT.getVectorNumElements();
   2952   SmallVector<int, 8> Mask;
   2953   Mask.push_back(NumElems);
   2954   for (unsigned i = 1; i != NumElems; ++i)
   2955     Mask.push_back(i);
   2956   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
   2957 }
   2958 
   2959 SDValue
   2960 X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   2961                              SmallVectorImpl<SDValue> &InVals) const {
   2962   SelectionDAG &DAG                     = CLI.DAG;
   2963   SDLoc &dl                             = CLI.DL;
   2964   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   2965   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
   2966   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   2967   SDValue Chain                         = CLI.Chain;
   2968   SDValue Callee                        = CLI.Callee;
   2969   CallingConv::ID CallConv              = CLI.CallConv;
   2970   bool &isTailCall                      = CLI.IsTailCall;
   2971   bool isVarArg                         = CLI.IsVarArg;
   2972 
   2973   MachineFunction &MF = DAG.getMachineFunction();
   2974   bool Is64Bit        = Subtarget.is64Bit();
   2975   bool IsWin64        = Subtarget.isCallingConvWin64(CallConv);
   2976   StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
   2977   bool IsSibcall      = false;
   2978   X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
   2979   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
   2980 
   2981   if (CallConv == CallingConv::X86_INTR)
   2982     report_fatal_error("X86 interrupts may not be called directly");
   2983 
   2984   if (Attr.getValueAsString() == "true")
   2985     isTailCall = false;
   2986 
   2987   if (Subtarget.isPICStyleGOT() &&
   2988       !MF.getTarget().Options.GuaranteedTailCallOpt) {
   2989     // If we are using a GOT, disable tail calls to external symbols with
   2990     // default visibility. Tail calling such a symbol requires using a GOT
   2991     // relocation, which forces early binding of the symbol. This breaks code
   2992     // that require lazy function symbol resolution. Using musttail or
   2993     // GuaranteedTailCallOpt will override this.
   2994     GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
   2995     if (!G || (!G->getGlobal()->hasLocalLinkage() &&
   2996                G->getGlobal()->hasDefaultVisibility()))
   2997       isTailCall = false;
   2998   }
   2999 
   3000   bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
   3001   if (IsMustTail) {
   3002     // Force this to be a tail call.  The verifier rules are enough to ensure
   3003     // that we can lower this successfully without moving the return address
   3004     // around.
   3005     isTailCall = true;
   3006   } else if (isTailCall) {
   3007     // Check if it's really possible to do a tail call.
   3008     isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
   3009                     isVarArg, SR != NotStructReturn,
   3010                     MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
   3011                     Outs, OutVals, Ins, DAG);
   3012 
   3013     // Sibcalls are automatically detected tailcalls which do not require
   3014     // ABI changes.
   3015     if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
   3016       IsSibcall = true;
   3017 
   3018     if (isTailCall)
   3019       ++NumTailCalls;
   3020   }
   3021 
   3022   assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
   3023          "Var args not supported with calling convention fastcc, ghc or hipe");
   3024 
   3025   // Analyze operands of the call, assigning locations to each operand.
   3026   SmallVector<CCValAssign, 16> ArgLocs;
   3027   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
   3028 
   3029   // Allocate shadow area for Win64
   3030   if (IsWin64)
   3031     CCInfo.AllocateStack(32, 8);
   3032 
   3033   CCInfo.AnalyzeCallOperands(Outs, CC_X86);
   3034 
   3035   // Get a count of how many bytes are to be pushed on the stack.
   3036   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
   3037   if (IsSibcall)
   3038     // This is a sibcall. The memory operands are available in caller's
   3039     // own caller's stack.
   3040     NumBytes = 0;
   3041   else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
   3042            canGuaranteeTCO(CallConv))
   3043     NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
   3044 
   3045   int FPDiff = 0;
   3046   if (isTailCall && !IsSibcall && !IsMustTail) {
   3047     // Lower arguments at fp - stackoffset + fpdiff.
   3048     unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
   3049 
   3050     FPDiff = NumBytesCallerPushed - NumBytes;
   3051 
   3052     // Set the delta of movement of the returnaddr stackslot.
   3053     // But only set if delta is greater than previous delta.
   3054     if (FPDiff < X86Info->getTCReturnAddrDelta())
   3055       X86Info->setTCReturnAddrDelta(FPDiff);
   3056   }
   3057 
   3058   unsigned NumBytesToPush = NumBytes;
   3059   unsigned NumBytesToPop = NumBytes;
   3060 
   3061   // If we have an inalloca argument, all stack space has already been allocated
   3062   // for us and be right at the top of the stack.  We don't support multiple
   3063   // arguments passed in memory when using inalloca.
   3064   if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
   3065     NumBytesToPush = 0;
   3066     if (!ArgLocs.back().isMemLoc())
   3067       report_fatal_error("cannot use inalloca attribute on a register "
   3068                          "parameter");
   3069     if (ArgLocs.back().getLocMemOffset() != 0)
   3070       report_fatal_error("any parameter with the inalloca attribute must be "
   3071                          "the only memory argument");
   3072   }
   3073 
   3074   if (!IsSibcall)
   3075     Chain = DAG.getCALLSEQ_START(
   3076         Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
   3077 
   3078   SDValue RetAddrFrIdx;
   3079   // Load return address for tail calls.
   3080   if (isTailCall && FPDiff)
   3081     Chain = EmitTailCallLoadRetAddr</