Home | History | Annotate | Download | only in X86
      1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the X86 implementation of TargetFrameLowering class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "X86FrameLowering.h"
     15 #include "X86InstrBuilder.h"
     16 #include "X86InstrInfo.h"
     17 #include "X86MachineFunctionInfo.h"
     18 #include "X86Subtarget.h"
     19 #include "X86TargetMachine.h"
     20 #include "llvm/ADT/SmallSet.h"
     21 #include "llvm/CodeGen/MachineFrameInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineModuleInfo.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 #include "llvm/IR/DataLayout.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/MC/MCAsmInfo.h"
     29 #include "llvm/MC/MCSymbol.h"
     30 #include "llvm/Support/CommandLine.h"
     31 #include "llvm/Target/TargetOptions.h"
     32 #include "llvm/Support/Debug.h"
     33 #include <cstdlib>
     34 
     35 using namespace llvm;
     36 
     37 // FIXME: completely move here.
     38 extern cl::opt<bool> ForceStackAlign;
     39 
     40 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     41   return !MF.getFrameInfo()->hasVarSizedObjects() &&
     42          !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
     43 }
     44 
     45 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
     46 /// call frame pseudos can be simplified.  Having a FP, as in the default
     47 /// implementation, is not sufficient here since we can't always use it.
     48 /// Use a more nuanced condition.
     49 bool
     50 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
     51   const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>
     52                                (MF.getSubtarget().getRegisterInfo());
     53   return hasReservedCallFrame(MF) ||
     54          (hasFP(MF) && !TRI->needsStackRealignment(MF))
     55          || TRI->hasBasePointer(MF);
     56 }
     57 
     58 // needsFrameIndexResolution - Do we need to perform FI resolution for
     59 // this function. Normally, this is required only when the function
     60 // has any stack objects. However, FI resolution actually has another job,
     61 // not apparent from the title - it resolves callframesetup/destroy
     62 // that were not simplified earlier.
     63 // So, this is required for x86 functions that have push sequences even
     64 // when there are no stack objects.
     65 bool
     66 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
     67   return MF.getFrameInfo()->hasStackObjects() ||
     68          MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
     69 }
     70 
     71 /// hasFP - Return true if the specified function should have a dedicated frame
     72 /// pointer register.  This is true if the function has variable sized allocas
     73 /// or if frame pointer elimination is disabled.
     74 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
     75   const MachineFrameInfo *MFI = MF.getFrameInfo();
     76   const MachineModuleInfo &MMI = MF.getMMI();
     77   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
     78 
     79   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
     80           RegInfo->needsStackRealignment(MF) ||
     81           MFI->hasVarSizedObjects() ||
     82           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
     83           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
     84           MMI.callsUnwindInit() || MMI.callsEHReturn() ||
     85           MFI->hasStackMap() || MFI->hasPatchPoint());
     86 }
     87 
     88 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
     89   if (IsLP64) {
     90     if (isInt<8>(Imm))
     91       return X86::SUB64ri8;
     92     return X86::SUB64ri32;
     93   } else {
     94     if (isInt<8>(Imm))
     95       return X86::SUB32ri8;
     96     return X86::SUB32ri;
     97   }
     98 }
     99 
    100 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
    101   if (IsLP64) {
    102     if (isInt<8>(Imm))
    103       return X86::ADD64ri8;
    104     return X86::ADD64ri32;
    105   } else {
    106     if (isInt<8>(Imm))
    107       return X86::ADD32ri8;
    108     return X86::ADD32ri;
    109   }
    110 }
    111 
    112 static unsigned getSUBrrOpcode(unsigned isLP64) {
    113   return isLP64 ? X86::SUB64rr : X86::SUB32rr;
    114 }
    115 
    116 static unsigned getADDrrOpcode(unsigned isLP64) {
    117   return isLP64 ? X86::ADD64rr : X86::ADD32rr;
    118 }
    119 
    120 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
    121   if (IsLP64) {
    122     if (isInt<8>(Imm))
    123       return X86::AND64ri8;
    124     return X86::AND64ri32;
    125   }
    126   if (isInt<8>(Imm))
    127     return X86::AND32ri8;
    128   return X86::AND32ri;
    129 }
    130 
    131 static unsigned getLEArOpcode(unsigned IsLP64) {
    132   return IsLP64 ? X86::LEA64r : X86::LEA32r;
    133 }
    134 
    135 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
    136 /// when it reaches the "return" instruction. We can then pop a stack object
    137 /// to this register without worry about clobbering it.
    138 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
    139                                        MachineBasicBlock::iterator &MBBI,
    140                                        const TargetRegisterInfo &TRI,
    141                                        bool Is64Bit) {
    142   const MachineFunction *MF = MBB.getParent();
    143   const Function *F = MF->getFunction();
    144   if (!F || MF->getMMI().callsEHReturn())
    145     return 0;
    146 
    147   static const uint16_t CallerSavedRegs32Bit[] = {
    148     X86::EAX, X86::EDX, X86::ECX, 0
    149   };
    150 
    151   static const uint16_t CallerSavedRegs64Bit[] = {
    152     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
    153     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
    154   };
    155 
    156   unsigned Opc = MBBI->getOpcode();
    157   switch (Opc) {
    158   default: return 0;
    159   case X86::RETL:
    160   case X86::RETQ:
    161   case X86::RETIL:
    162   case X86::RETIQ:
    163   case X86::TCRETURNdi:
    164   case X86::TCRETURNri:
    165   case X86::TCRETURNmi:
    166   case X86::TCRETURNdi64:
    167   case X86::TCRETURNri64:
    168   case X86::TCRETURNmi64:
    169   case X86::EH_RETURN:
    170   case X86::EH_RETURN64: {
    171     SmallSet<uint16_t, 8> Uses;
    172     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
    173       MachineOperand &MO = MBBI->getOperand(i);
    174       if (!MO.isReg() || MO.isDef())
    175         continue;
    176       unsigned Reg = MO.getReg();
    177       if (!Reg)
    178         continue;
    179       for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
    180         Uses.insert(*AI);
    181     }
    182 
    183     const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
    184     for (; *CS; ++CS)
    185       if (!Uses.count(*CS))
    186         return *CS;
    187   }
    188   }
    189 
    190   return 0;
    191 }
    192 
    193 static bool isEAXLiveIn(MachineFunction &MF) {
    194   for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
    195        EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
    196     unsigned Reg = II->first;
    197 
    198     if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
    199         Reg == X86::AH || Reg == X86::AL)
    200       return true;
    201   }
    202 
    203   return false;
    204 }
    205 
    206 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
    207 /// stack pointer by a constant value.
    208 static
    209 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
    210                   unsigned StackPtr, int64_t NumBytes,
    211                   bool Is64BitTarget, bool Is64BitStackPtr, bool UseLEA,
    212                   const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
    213   bool isSub = NumBytes < 0;
    214   uint64_t Offset = isSub ? -NumBytes : NumBytes;
    215   unsigned Opc;
    216   if (UseLEA)
    217     Opc = getLEArOpcode(Is64BitStackPtr);
    218   else
    219     Opc = isSub
    220       ? getSUBriOpcode(Is64BitStackPtr, Offset)
    221       : getADDriOpcode(Is64BitStackPtr, Offset);
    222 
    223   uint64_t Chunk = (1LL << 31) - 1;
    224   DebugLoc DL = MBB.findDebugLoc(MBBI);
    225 
    226   while (Offset) {
    227     if (Offset > Chunk) {
    228       // Rather than emit a long series of instructions for large offsets,
    229       // load the offset into a register and do one sub/add
    230       unsigned Reg = 0;
    231 
    232       if (isSub && !isEAXLiveIn(*MBB.getParent()))
    233         Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
    234       else
    235         Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
    236 
    237       if (Reg) {
    238         Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
    239         BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
    240           .addImm(Offset);
    241         Opc = isSub
    242           ? getSUBrrOpcode(Is64BitTarget)
    243           : getADDrrOpcode(Is64BitTarget);
    244         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
    245           .addReg(StackPtr)
    246           .addReg(Reg);
    247         MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
    248         Offset = 0;
    249         continue;
    250       }
    251     }
    252 
    253     uint64_t ThisVal = std::min(Offset, Chunk);
    254     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
    255       // Use push / pop instead.
    256       unsigned Reg = isSub
    257         ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
    258         : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
    259       if (Reg) {
    260         Opc = isSub
    261           ? (Is64BitTarget ? X86::PUSH64r : X86::PUSH32r)
    262           : (Is64BitTarget ? X86::POP64r  : X86::POP32r);
    263         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
    264           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
    265         if (isSub)
    266           MI->setFlag(MachineInstr::FrameSetup);
    267         Offset -= ThisVal;
    268         continue;
    269       }
    270     }
    271 
    272     MachineInstr *MI = nullptr;
    273 
    274     if (UseLEA) {
    275       MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
    276                           StackPtr, false, isSub ? -ThisVal : ThisVal);
    277     } else {
    278       MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
    279             .addReg(StackPtr)
    280             .addImm(ThisVal);
    281       MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
    282     }
    283 
    284     if (isSub)
    285       MI->setFlag(MachineInstr::FrameSetup);
    286 
    287     Offset -= ThisVal;
    288   }
    289 }
    290 
    291 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
    292 static
    293 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
    294                       unsigned StackPtr, uint64_t *NumBytes = nullptr) {
    295   if (MBBI == MBB.begin()) return;
    296 
    297   MachineBasicBlock::iterator PI = std::prev(MBBI);
    298   unsigned Opc = PI->getOpcode();
    299   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
    300        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
    301        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
    302       PI->getOperand(0).getReg() == StackPtr) {
    303     if (NumBytes)
    304       *NumBytes += PI->getOperand(2).getImm();
    305     MBB.erase(PI);
    306   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
    307               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
    308              PI->getOperand(0).getReg() == StackPtr) {
    309     if (NumBytes)
    310       *NumBytes -= PI->getOperand(2).getImm();
    311     MBB.erase(PI);
    312   }
    313 }
    314 
    315 /// mergeSPUpdates - Checks the instruction before/after the passed
    316 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
    317 /// the stack adjustment is returned as a positive value for ADD/LEA and a
    318 /// negative for SUB.
    319 static int mergeSPUpdates(MachineBasicBlock &MBB,
    320                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
    321                           bool doMergeWithPrevious) {
    322   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
    323       (!doMergeWithPrevious && MBBI == MBB.end()))
    324     return 0;
    325 
    326   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
    327   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
    328                                                        : std::next(MBBI);
    329   unsigned Opc = PI->getOpcode();
    330   int Offset = 0;
    331 
    332   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
    333        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
    334        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
    335       PI->getOperand(0).getReg() == StackPtr){
    336     Offset += PI->getOperand(2).getImm();
    337     MBB.erase(PI);
    338     if (!doMergeWithPrevious) MBBI = NI;
    339   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
    340               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
    341              PI->getOperand(0).getReg() == StackPtr) {
    342     Offset -= PI->getOperand(2).getImm();
    343     MBB.erase(PI);
    344     if (!doMergeWithPrevious) MBBI = NI;
    345   }
    346 
    347   return Offset;
    348 }
    349 
    350 void
    351 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
    352                                             MachineBasicBlock::iterator MBBI,
    353                                             DebugLoc DL) const {
    354   MachineFunction &MF = *MBB.getParent();
    355   MachineFrameInfo *MFI = MF.getFrameInfo();
    356   MachineModuleInfo &MMI = MF.getMMI();
    357   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
    358   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    359 
    360   // Add callee saved registers to move list.
    361   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    362   if (CSI.empty()) return;
    363 
    364   // Calculate offsets.
    365   for (std::vector<CalleeSavedInfo>::const_iterator
    366          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
    367     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
    368     unsigned Reg = I->getReg();
    369 
    370     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
    371     unsigned CFIIndex =
    372         MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
    373                                                         Offset));
    374     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    375         .addCFIIndex(CFIIndex);
    376   }
    377 }
    378 
    379 /// usesTheStack - This function checks if any of the users of EFLAGS
    380 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
    381 /// to use the stack, and if we don't adjust the stack we clobber the first
    382 /// frame index.
    383 /// See X86InstrInfo::copyPhysReg.
    384 static bool usesTheStack(const MachineFunction &MF) {
    385   const MachineRegisterInfo &MRI = MF.getRegInfo();
    386 
    387   for (MachineRegisterInfo::reg_instr_iterator
    388        ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
    389        ri != re; ++ri)
    390     if (ri->isCopy())
    391       return true;
    392 
    393   return false;
    394 }
    395 
    396 void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
    397                                           MachineBasicBlock &MBB,
    398                                           MachineBasicBlock::iterator MBBI,
    399                                           DebugLoc DL) {
    400   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
    401   const TargetInstrInfo &TII = *STI.getInstrInfo();
    402   bool Is64Bit = STI.is64Bit();
    403   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
    404 
    405   unsigned CallOp;
    406   if (Is64Bit)
    407     CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32;
    408   else
    409     CallOp = X86::CALLpcrel32;
    410 
    411   const char *Symbol;
    412   if (Is64Bit) {
    413     if (STI.isTargetCygMing()) {
    414       Symbol = "___chkstk_ms";
    415     } else {
    416       Symbol = "__chkstk";
    417     }
    418   } else if (STI.isTargetCygMing())
    419     Symbol = "_alloca";
    420   else
    421     Symbol = "_chkstk";
    422 
    423   MachineInstrBuilder CI;
    424 
    425   // All current stack probes take AX and SP as input, clobber flags, and
    426   // preserve all registers. x86_64 probes leave RSP unmodified.
    427   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
    428     // For the large code model, we have to call through a register. Use R11,
    429     // as it is scratch in all supported calling conventions.
    430     BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11)
    431         .addExternalSymbol(Symbol);
    432     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11);
    433   } else {
    434     CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addExternalSymbol(Symbol);
    435   }
    436 
    437   unsigned AX = Is64Bit ? X86::RAX : X86::EAX;
    438   unsigned SP = Is64Bit ? X86::RSP : X86::ESP;
    439   CI.addReg(AX, RegState::Implicit)
    440       .addReg(SP, RegState::Implicit)
    441       .addReg(AX, RegState::Define | RegState::Implicit)
    442       .addReg(SP, RegState::Define | RegState::Implicit)
    443       .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit);
    444 
    445   if (Is64Bit) {
    446     // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
    447     // themselves. It also does not clobber %rax so we can reuse it when
    448     // adjusting %rsp.
    449     BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), X86::RSP)
    450         .addReg(X86::RSP)
    451         .addReg(X86::RAX);
    452   }
    453 }
    454 
    455 static unsigned calculateSetFPREG(uint64_t SPAdjust) {
    456   // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
    457   // and might require smaller successive adjustments.
    458   const uint64_t Win64MaxSEHOffset = 128;
    459   uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset);
    460   // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
    461   return SEHFrameOffset & -16;
    462 }
    463 
    464 // If we're forcing a stack realignment we can't rely on just the frame
    465 // info, we need to know the ABI stack alignment as well in case we
    466 // have a call out.  Otherwise just make sure we have some alignment - we'll
    467 // go with the minimum SlotSize.
    468 static uint64_t calculateMaxStackAlign(const MachineFunction &MF) {
    469   const MachineFrameInfo *MFI = MF.getFrameInfo();
    470   uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
    471   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
    472   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
    473   unsigned SlotSize = RegInfo->getSlotSize();
    474   unsigned StackAlign = STI.getFrameLowering()->getStackAlignment();
    475   if (ForceStackAlign) {
    476     if (MFI->hasCalls())
    477       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
    478     else if (MaxAlign < SlotSize)
    479       MaxAlign = SlotSize;
    480   }
    481   return MaxAlign;
    482 }
    483 
    484 /// emitPrologue - Push callee-saved registers onto the stack, which
    485 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
    486 /// space for local variables. Also emit labels used by the exception handler to
    487 /// generate the exception handling frames.
    488 
    489 /*
    490   Here's a gist of what gets emitted:
    491 
    492   ; Establish frame pointer, if needed
    493   [if needs FP]
    494       push  %rbp
    495       .cfi_def_cfa_offset 16
    496       .cfi_offset %rbp, -16
    497       .seh_pushreg %rpb
    498       mov  %rsp, %rbp
    499       .cfi_def_cfa_register %rbp
    500 
    501   ; Spill general-purpose registers
    502   [for all callee-saved GPRs]
    503       pushq %<reg>
    504       [if not needs FP]
    505          .cfi_def_cfa_offset (offset from RETADDR)
    506       .seh_pushreg %<reg>
    507 
    508   ; If the required stack alignment > default stack alignment
    509   ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
    510   ; of unknown size in the stack frame.
    511   [if stack needs re-alignment]
    512       and  $MASK, %rsp
    513 
    514   ; Allocate space for locals
    515   [if target is Windows and allocated space > 4096 bytes]
    516       ; Windows needs special care for allocations larger
    517       ; than one page.
    518       mov $NNN, %rax
    519       call ___chkstk_ms/___chkstk
    520       sub  %rax, %rsp
    521   [else]
    522       sub  $NNN, %rsp
    523 
    524   [if needs FP]
    525       .seh_stackalloc (size of XMM spill slots)
    526       .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
    527   [else]
    528       .seh_stackalloc NNN
    529 
    530   ; Spill XMMs
    531   ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
    532   ; they may get spilled on any platform, if the current function
    533   ; calls @llvm.eh.unwind.init
    534   [if needs FP]
    535       [for all callee-saved XMM registers]
    536           movaps  %<xmm reg>, -MMM(%rbp)
    537       [for all callee-saved XMM registers]
    538           .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
    539               ; i.e. the offset relative to (%rbp - SEHFrameOffset)
    540   [else]
    541       [for all callee-saved XMM registers]
    542           movaps  %<xmm reg>, KKK(%rsp)
    543       [for all callee-saved XMM registers]
    544           .seh_savexmm %<xmm reg>, KKK
    545 
    546   .seh_endprologue
    547 
    548   [if needs base pointer]
    549       mov  %rsp, %rbx
    550       [if needs to restore base pointer]
    551           mov %rsp, -MMM(%rbp)
    552 
    553   ; Emit CFI info
    554   [if needs FP]
    555       [for all callee-saved registers]
    556           .cfi_offset %<reg>, (offset from %rbp)
    557   [else]
    558        .cfi_def_cfa_offset (offset from RETADDR)
    559       [for all callee-saved registers]
    560           .cfi_offset %<reg>, (offset from %rsp)
    561 
    562   Notes:
    563   - .seh directives are emitted only for Windows 64 ABI
    564   - .cfi directives are emitted for all other ABIs
    565   - for 32-bit code, substitute %e?? registers for %r??
    566 */
    567 
    568 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
    569   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
    570   MachineBasicBlock::iterator MBBI = MBB.begin();
    571   MachineFrameInfo *MFI = MF.getFrameInfo();
    572   const Function *Fn = MF.getFunction();
    573   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
    574   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
    575   const TargetInstrInfo &TII = *STI.getInstrInfo();
    576   MachineModuleInfo &MMI = MF.getMMI();
    577   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
    578   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
    579   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
    580   bool HasFP = hasFP(MF);
    581   bool Is64Bit = STI.is64Bit();
    582   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
    583   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
    584   bool IsWin64 = STI.isCallingConvWin64(Fn->getCallingConv());
    585   // Not necessarily synonymous with IsWin64.
    586   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
    587   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
    588   bool NeedsDwarfCFI =
    589       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
    590   bool UseLEA = STI.useLeaForSP();
    591   unsigned SlotSize = RegInfo->getSlotSize();
    592   unsigned FramePtr = RegInfo->getFrameRegister(MF);
    593   const unsigned MachineFramePtr =
    594       STI.isTarget64BitILP32()
    595           ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
    596           : FramePtr;
    597   unsigned StackPtr = RegInfo->getStackRegister();
    598   unsigned BasePtr = RegInfo->getBaseRegister();
    599   DebugLoc DL;
    600 
    601   // Add RETADDR move area to callee saved frame size.
    602   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
    603   if (TailCallReturnAddrDelta && IsWinEH)
    604     report_fatal_error("Can't handle guaranteed tail call under win64 yet");
    605 
    606   if (TailCallReturnAddrDelta < 0)
    607     X86FI->setCalleeSavedFrameSize(
    608       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
    609 
    610   bool UseStackProbe = (STI.isOSWindows() && !STI.isTargetMachO());
    611 
    612   // The default stack probe size is 4096 if the function has no stackprobesize
    613   // attribute.
    614   unsigned StackProbeSize = 4096;
    615   if (Fn->hasFnAttribute("stack-probe-size"))
    616     Fn->getFnAttribute("stack-probe-size")
    617         .getValueAsString()
    618         .getAsInteger(0, StackProbeSize);
    619 
    620   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
    621   // function, and use up to 128 bytes of stack space, don't have a frame
    622   // pointer, calls, or dynamic alloca then we do not need to adjust the
    623   // stack pointer (we fit in the Red Zone). We also check that we don't
    624   // push and pop from the stack.
    625   if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
    626       !RegInfo->needsStackRealignment(MF) &&
    627       !MFI->hasVarSizedObjects() && // No dynamic alloca.
    628       !MFI->adjustsStack() &&       // No calls.
    629       !IsWin64 &&                   // Win64 has no Red Zone
    630       !usesTheStack(MF) &&          // Don't push and pop.
    631       !MF.shouldSplitStack()) {     // Regular stack
    632     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
    633     if (HasFP) MinSize += SlotSize;
    634     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
    635     MFI->setStackSize(StackSize);
    636   }
    637 
    638   // Insert stack pointer adjustment for later moving of return addr.  Only
    639   // applies to tail call optimized functions where the callee argument stack
    640   // size is bigger than the callers.
    641   if (TailCallReturnAddrDelta < 0) {
    642     MachineInstr *MI =
    643       BuildMI(MBB, MBBI, DL,
    644               TII.get(getSUBriOpcode(Uses64BitFramePtr, -TailCallReturnAddrDelta)),
    645               StackPtr)
    646         .addReg(StackPtr)
    647         .addImm(-TailCallReturnAddrDelta)
    648         .setMIFlag(MachineInstr::FrameSetup);
    649     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
    650   }
    651 
    652   // Mapping for machine moves:
    653   //
    654   //   DST: VirtualFP AND
    655   //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
    656   //        ELSE                        => DW_CFA_def_cfa
    657   //
    658   //   SRC: VirtualFP AND
    659   //        DST: Register               => DW_CFA_def_cfa_register
    660   //
    661   //   ELSE
    662   //        OFFSET < 0                  => DW_CFA_offset_extended_sf
    663   //        REG < 64                    => DW_CFA_offset + Reg
    664   //        ELSE                        => DW_CFA_offset_extended
    665 
    666   uint64_t NumBytes = 0;
    667   int stackGrowth = -SlotSize;
    668 
    669   if (HasFP) {
    670     // Calculate required stack adjustment.
    671     uint64_t FrameSize = StackSize - SlotSize;
    672     // If required, include space for extra hidden slot for stashing base pointer.
    673     if (X86FI->getRestoreBasePointer())
    674       FrameSize += SlotSize;
    675 
    676     NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
    677 
    678     // Callee-saved registers are pushed on stack before the stack is realigned.
    679     if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
    680       NumBytes = RoundUpToAlignment(NumBytes, MaxAlign);
    681 
    682     // Get the offset of the stack slot for the EBP register, which is
    683     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
    684     // Update the frame offset adjustment.
    685     MFI->setOffsetAdjustment(-NumBytes);
    686 
    687     // Save EBP/RBP into the appropriate stack slot.
    688     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
    689       .addReg(MachineFramePtr, RegState::Kill)
    690       .setMIFlag(MachineInstr::FrameSetup);
    691 
    692     if (NeedsDwarfCFI) {
    693       // Mark the place where EBP/RBP was saved.
    694       // Define the current CFA rule to use the provided offset.
    695       assert(StackSize);
    696       unsigned CFIIndex = MMI.addFrameInst(
    697           MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
    698       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    699           .addCFIIndex(CFIIndex);
    700 
    701       // Change the rule for the FramePtr to be an "offset" rule.
    702       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
    703       CFIIndex = MMI.addFrameInst(
    704           MCCFIInstruction::createOffset(nullptr,
    705                                          DwarfFramePtr, 2 * stackGrowth));
    706       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    707           .addCFIIndex(CFIIndex);
    708     }
    709 
    710     if (NeedsWinEH) {
    711       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
    712           .addImm(FramePtr)
    713           .setMIFlag(MachineInstr::FrameSetup);
    714     }
    715 
    716     if (!IsWinEH) {
    717       // Update EBP with the new base value.
    718       BuildMI(MBB, MBBI, DL,
    719               TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
    720               FramePtr)
    721           .addReg(StackPtr)
    722           .setMIFlag(MachineInstr::FrameSetup);
    723     }
    724 
    725     if (NeedsDwarfCFI) {
    726       // Mark effective beginning of when frame pointer becomes valid.
    727       // Define the current CFA to use the EBP/RBP register.
    728       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(MachineFramePtr, true);
    729       unsigned CFIIndex = MMI.addFrameInst(
    730           MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
    731       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    732           .addCFIIndex(CFIIndex);
    733     }
    734 
    735     // Mark the FramePtr as live-in in every block.
    736     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
    737       I->addLiveIn(MachineFramePtr);
    738   } else {
    739     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
    740   }
    741 
    742   // Skip the callee-saved push instructions.
    743   bool PushedRegs = false;
    744   int StackOffset = 2 * stackGrowth;
    745 
    746   while (MBBI != MBB.end() &&
    747          (MBBI->getOpcode() == X86::PUSH32r ||
    748           MBBI->getOpcode() == X86::PUSH64r)) {
    749     PushedRegs = true;
    750     unsigned Reg = MBBI->getOperand(0).getReg();
    751     ++MBBI;
    752 
    753     if (!HasFP && NeedsDwarfCFI) {
    754       // Mark callee-saved push instruction.
    755       // Define the current CFA rule to use the provided offset.
    756       assert(StackSize);
    757       unsigned CFIIndex = MMI.addFrameInst(
    758           MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
    759       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    760           .addCFIIndex(CFIIndex);
    761       StackOffset += stackGrowth;
    762     }
    763 
    764     if (NeedsWinEH) {
    765       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
    766           MachineInstr::FrameSetup);
    767     }
    768   }
    769 
    770   // Realign stack after we pushed callee-saved registers (so that we'll be
    771   // able to calculate their offsets from the frame pointer).
    772   // Don't do this for Win64, it needs to realign the stack after the prologue.
    773   if (!IsWinEH && RegInfo->needsStackRealignment(MF)) {
    774     assert(HasFP && "There should be a frame pointer if stack is realigned.");
    775     uint64_t Val = -MaxAlign;
    776     MachineInstr *MI =
    777         BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
    778                 StackPtr)
    779             .addReg(StackPtr)
    780             .addImm(Val)
    781             .setMIFlag(MachineInstr::FrameSetup);
    782 
    783     // The EFLAGS implicit def is dead.
    784     MI->getOperand(3).setIsDead();
    785   }
    786 
    787   // If there is an SUB32ri of ESP immediately before this instruction, merge
    788   // the two. This can be the case when tail call elimination is enabled and
    789   // the callee has more arguments then the caller.
    790   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
    791 
    792   // Adjust stack pointer: ESP -= numbytes.
    793 
    794   // Windows and cygwin/mingw require a prologue helper routine when allocating
    795   // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
    796   // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
    797   // stack and adjust the stack pointer in one go.  The 64-bit version of
    798   // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
    799   // responsible for adjusting the stack pointer.  Touching the stack at 4K
    800   // increments is necessary to ensure that the guard pages used by the OS
    801   // virtual memory manager are allocated in correct sequence.
    802   uint64_t AlignedNumBytes = NumBytes;
    803   if (IsWinEH && RegInfo->needsStackRealignment(MF))
    804     AlignedNumBytes = RoundUpToAlignment(AlignedNumBytes, MaxAlign);
    805   if (AlignedNumBytes >= StackProbeSize && UseStackProbe) {
    806     // Check whether EAX is livein for this function.
    807     bool isEAXAlive = isEAXLiveIn(MF);
    808 
    809     if (isEAXAlive) {
    810       // Sanity check that EAX is not livein for this function.
    811       // It should not be, so throw an assert.
    812       assert(!Is64Bit && "EAX is livein in x64 case!");
    813 
    814       // Save EAX
    815       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
    816         .addReg(X86::EAX, RegState::Kill)
    817         .setMIFlag(MachineInstr::FrameSetup);
    818     }
    819 
    820     if (Is64Bit) {
    821       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
    822       // Function prologue is responsible for adjusting the stack pointer.
    823       if (isUInt<32>(NumBytes)) {
    824         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
    825             .addImm(NumBytes)
    826             .setMIFlag(MachineInstr::FrameSetup);
    827       } else if (isInt<32>(NumBytes)) {
    828         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX)
    829             .addImm(NumBytes)
    830             .setMIFlag(MachineInstr::FrameSetup);
    831       } else {
    832         BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
    833             .addImm(NumBytes)
    834             .setMIFlag(MachineInstr::FrameSetup);
    835       }
    836     } else {
    837       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
    838       // We'll also use 4 already allocated bytes for EAX.
    839       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
    840         .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
    841         .setMIFlag(MachineInstr::FrameSetup);
    842     }
    843 
    844     // Save a pointer to the MI where we set AX.
    845     MachineBasicBlock::iterator SetRAX = MBBI;
    846     --SetRAX;
    847 
    848     // Call __chkstk, __chkstk_ms, or __alloca.
    849     emitStackProbeCall(MF, MBB, MBBI, DL);
    850 
    851     // Apply the frame setup flag to all inserted instrs.
    852     for (; SetRAX != MBBI; ++SetRAX)
    853       SetRAX->setFlag(MachineInstr::FrameSetup);
    854 
    855     if (isEAXAlive) {
    856       // Restore EAX
    857       MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
    858                                               X86::EAX),
    859                                       StackPtr, false, NumBytes - 4);
    860       MI->setFlag(MachineInstr::FrameSetup);
    861       MBB.insert(MBBI, MI);
    862     }
    863   } else if (NumBytes) {
    864     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, Uses64BitFramePtr,
    865                  UseLEA, TII, *RegInfo);
    866   }
    867 
    868   if (NeedsWinEH && NumBytes)
    869     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
    870         .addImm(NumBytes)
    871         .setMIFlag(MachineInstr::FrameSetup);
    872 
    873   int SEHFrameOffset = 0;
    874   if (IsWinEH && HasFP) {
    875     SEHFrameOffset = calculateSetFPREG(NumBytes);
    876     if (SEHFrameOffset)
    877       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr),
    878                    StackPtr, false, SEHFrameOffset);
    879     else
    880       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr).addReg(StackPtr);
    881 
    882     if (NeedsWinEH)
    883       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
    884           .addImm(FramePtr)
    885           .addImm(SEHFrameOffset)
    886           .setMIFlag(MachineInstr::FrameSetup);
    887   }
    888 
    889   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) {
    890     const MachineInstr *FrameInstr = &*MBBI;
    891     ++MBBI;
    892 
    893     if (NeedsWinEH) {
    894       int FI;
    895       if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) {
    896         if (X86::FR64RegClass.contains(Reg)) {
    897           int Offset = getFrameIndexOffset(MF, FI);
    898           Offset += SEHFrameOffset;
    899 
    900           BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
    901               .addImm(Reg)
    902               .addImm(Offset)
    903               .setMIFlag(MachineInstr::FrameSetup);
    904         }
    905       }
    906     }
    907   }
    908 
    909   if (NeedsWinEH)
    910     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
    911         .setMIFlag(MachineInstr::FrameSetup);
    912 
    913   // Realign stack after we spilled callee-saved registers (so that we'll be
    914   // able to calculate their offsets from the frame pointer).
    915   // Win64 requires aligning the stack after the prologue.
    916   if (IsWinEH && RegInfo->needsStackRealignment(MF)) {
    917     assert(HasFP && "There should be a frame pointer if stack is realigned.");
    918     uint64_t Val = -MaxAlign;
    919     MachineInstr *MI =
    920         BuildMI(MBB, MBBI, DL, TII.get(getANDriOpcode(Uses64BitFramePtr, Val)),
    921                 StackPtr)
    922             .addReg(StackPtr)
    923             .addImm(Val)
    924             .setMIFlag(MachineInstr::FrameSetup);
    925 
    926     // The EFLAGS implicit def is dead.
    927     MI->getOperand(3).setIsDead();
    928   }
    929 
    930   // If we need a base pointer, set it up here. It's whatever the value
    931   // of the stack pointer is at this point. Any variable size objects
    932   // will be allocated after this, so we can still use the base pointer
    933   // to reference locals.
    934   if (RegInfo->hasBasePointer(MF)) {
    935     // Update the base pointer with the current stack pointer.
    936     unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr;
    937     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
    938       .addReg(StackPtr)
    939       .setMIFlag(MachineInstr::FrameSetup);
    940     if (X86FI->getRestoreBasePointer()) {
    941       // Stash value of base pointer.  Saving RSP instead of EBP shortens dependence chain.
    942       unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr;
    943       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)),
    944                    FramePtr, true, X86FI->getRestoreBasePointerOffset())
    945         .addReg(StackPtr)
    946         .setMIFlag(MachineInstr::FrameSetup);
    947     }
    948   }
    949 
    950   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
    951     // Mark end of stack pointer adjustment.
    952     if (!HasFP && NumBytes) {
    953       // Define the current CFA rule to use the provided offset.
    954       assert(StackSize);
    955       unsigned CFIIndex = MMI.addFrameInst(
    956           MCCFIInstruction::createDefCfaOffset(nullptr,
    957                                                -StackSize + stackGrowth));
    958 
    959       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    960           .addCFIIndex(CFIIndex);
    961     }
    962 
    963     // Emit DWARF info specifying the offsets of the callee-saved registers.
    964     if (PushedRegs)
    965       emitCalleeSavedFrameMoves(MBB, MBBI, DL);
    966   }
    967 }
    968 
    969 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
    970                                     MachineBasicBlock &MBB) const {
    971   const MachineFrameInfo *MFI = MF.getFrameInfo();
    972   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
    973   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
    974   const X86RegisterInfo *RegInfo = STI.getRegisterInfo();
    975   const TargetInstrInfo &TII = *STI.getInstrInfo();
    976   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    977   assert(MBBI != MBB.end() && "Returning block has no instructions");
    978   unsigned RetOpcode = MBBI->getOpcode();
    979   DebugLoc DL = MBBI->getDebugLoc();
    980   bool Is64Bit = STI.is64Bit();
    981   // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
    982   const bool Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64();
    983   bool HasFP = hasFP(MF);
    984   const bool Is64BitILP32 = STI.isTarget64BitILP32();
    985   unsigned SlotSize = RegInfo->getSlotSize();
    986   unsigned FramePtr = RegInfo->getFrameRegister(MF);
    987   unsigned MachineFramePtr =
    988       Is64BitILP32 ? getX86SubSuperRegister(FramePtr, MVT::i64, false)
    989                    : FramePtr;
    990   unsigned StackPtr = RegInfo->getStackRegister();
    991 
    992   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
    993   bool NeedsWinEH = IsWinEH && MF.getFunction()->needsUnwindTableEntry();
    994   bool UseLEAForSP = false;
    995 
    996   // We can't use LEA instructions for adjusting the stack pointer if this is a
    997   // leaf function in the Win64 ABI.  Only ADD instructions may be used to
    998   // deallocate the stack.
    999   if (STI.useLeaForSP()) {
   1000     if (!IsWinEH) {
   1001       // We *aren't* using the Win64 ABI which means we are free to use LEA.
   1002       UseLEAForSP = true;
   1003     } else if (HasFP) {
   1004       // We *have* a frame pointer which means we are permitted to use LEA.
   1005       UseLEAForSP = true;
   1006     }
   1007   }
   1008 
   1009   switch (RetOpcode) {
   1010   default:
   1011     llvm_unreachable("Can only insert epilogue into returning blocks");
   1012   case X86::RETQ:
   1013   case X86::RETL:
   1014   case X86::RETIL:
   1015   case X86::RETIQ:
   1016   case X86::TCRETURNdi:
   1017   case X86::TCRETURNri:
   1018   case X86::TCRETURNmi:
   1019   case X86::TCRETURNdi64:
   1020   case X86::TCRETURNri64:
   1021   case X86::TCRETURNmi64:
   1022   case X86::EH_RETURN:
   1023   case X86::EH_RETURN64:
   1024     break;  // These are ok
   1025   }
   1026 
   1027   // Get the number of bytes to allocate from the FrameInfo.
   1028   uint64_t StackSize = MFI->getStackSize();
   1029   uint64_t MaxAlign = calculateMaxStackAlign(MF);
   1030   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   1031   uint64_t NumBytes = 0;
   1032 
   1033   if (hasFP(MF)) {
   1034     // Calculate required stack adjustment.
   1035     uint64_t FrameSize = StackSize - SlotSize;
   1036     NumBytes = FrameSize - CSSize;
   1037 
   1038     // Callee-saved registers were pushed on stack before the stack was
   1039     // realigned.
   1040     if (RegInfo->needsStackRealignment(MF) && !IsWinEH)
   1041       NumBytes = RoundUpToAlignment(FrameSize, MaxAlign);
   1042 
   1043     // Pop EBP.
   1044     BuildMI(MBB, MBBI, DL,
   1045             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), MachineFramePtr);
   1046   } else {
   1047     NumBytes = StackSize - CSSize;
   1048   }
   1049   uint64_t SEHStackAllocAmt = NumBytes;
   1050 
   1051   // Skip the callee-saved pop instructions.
   1052   while (MBBI != MBB.begin()) {
   1053     MachineBasicBlock::iterator PI = std::prev(MBBI);
   1054     unsigned Opc = PI->getOpcode();
   1055 
   1056     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
   1057         !PI->isTerminator())
   1058       break;
   1059 
   1060     --MBBI;
   1061   }
   1062   MachineBasicBlock::iterator FirstCSPop = MBBI;
   1063 
   1064   DL = MBBI->getDebugLoc();
   1065 
   1066   // If there is an ADD32ri or SUB32ri of ESP immediately before this
   1067   // instruction, merge the two instructions.
   1068   if (NumBytes || MFI->hasVarSizedObjects())
   1069     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
   1070 
   1071   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   1072   // slot before popping them off! Same applies for the case, when stack was
   1073   // realigned.
   1074   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
   1075     if (RegInfo->needsStackRealignment(MF))
   1076       MBBI = FirstCSPop;
   1077     unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt);
   1078     uint64_t LEAAmount = IsWinEH ? SEHStackAllocAmt - SEHFrameOffset : -CSSize;
   1079 
   1080     // There are only two legal forms of epilogue:
   1081     // - add SEHAllocationSize, %rsp
   1082     // - lea SEHAllocationSize(%FramePtr), %rsp
   1083     //
   1084     // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
   1085     // However, we may use this sequence if we have a frame pointer because the
   1086     // effects of the prologue can safely be undone.
   1087     if (LEAAmount != 0) {
   1088       unsigned Opc = getLEArOpcode(Uses64BitFramePtr);
   1089       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
   1090                    FramePtr, false, LEAAmount);
   1091       --MBBI;
   1092     } else {
   1093       unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr);
   1094       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
   1095         .addReg(FramePtr);
   1096       --MBBI;
   1097     }
   1098   } else if (NumBytes) {
   1099     // Adjust stack pointer back: ESP += numbytes.
   1100     emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, Uses64BitFramePtr,
   1101                  UseLEAForSP, TII, *RegInfo);
   1102     --MBBI;
   1103   }
   1104 
   1105   // Windows unwinder will not invoke function's exception handler if IP is
   1106   // either in prologue or in epilogue.  This behavior causes a problem when a
   1107   // call immediately precedes an epilogue, because the return address points
   1108   // into the epilogue.  To cope with that, we insert an epilogue marker here,
   1109   // then replace it with a 'nop' if it ends up immediately after a CALL in the
   1110   // final emitted code.
   1111   if (NeedsWinEH)
   1112     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
   1113 
   1114   // We're returning from function via eh_return.
   1115   if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
   1116     MBBI = MBB.getLastNonDebugInstr();
   1117     MachineOperand &DestAddr  = MBBI->getOperand(0);
   1118     assert(DestAddr.isReg() && "Offset should be in register!");
   1119     BuildMI(MBB, MBBI, DL,
   1120             TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr),
   1121             StackPtr).addReg(DestAddr.getReg());
   1122   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
   1123              RetOpcode == X86::TCRETURNmi ||
   1124              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
   1125              RetOpcode == X86::TCRETURNmi64) {
   1126     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
   1127     // Tail call return: adjust the stack pointer and jump to callee.
   1128     MBBI = MBB.getLastNonDebugInstr();
   1129     MachineOperand &JumpTarget = MBBI->getOperand(0);
   1130     MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
   1131     assert(StackAdjust.isImm() && "Expecting immediate value.");
   1132 
   1133     // Adjust stack pointer.
   1134     int StackAdj = StackAdjust.getImm();
   1135     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
   1136     int Offset = 0;
   1137     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
   1138 
   1139     // Incoporate the retaddr area.
   1140     Offset = StackAdj-MaxTCDelta;
   1141     assert(Offset >= 0 && "Offset should never be negative");
   1142 
   1143     if (Offset) {
   1144       // Check for possible merge with preceding ADD instruction.
   1145       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
   1146       emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, Uses64BitFramePtr,
   1147                    UseLEAForSP, TII, *RegInfo);
   1148     }
   1149 
   1150     // Jump to label or value in register.
   1151     bool IsWin64 = STI.isTargetWin64();
   1152     if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
   1153       unsigned Op = (RetOpcode == X86::TCRETURNdi)
   1154                         ? X86::TAILJMPd
   1155                         : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
   1156       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
   1157       if (JumpTarget.isGlobal())
   1158         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
   1159                              JumpTarget.getTargetFlags());
   1160       else {
   1161         assert(JumpTarget.isSymbol());
   1162         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
   1163                               JumpTarget.getTargetFlags());
   1164       }
   1165     } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
   1166       unsigned Op = (RetOpcode == X86::TCRETURNmi)
   1167                         ? X86::TAILJMPm
   1168                         : (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);
   1169       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII.get(Op));
   1170       for (unsigned i = 0; i != 5; ++i)
   1171         MIB.addOperand(MBBI->getOperand(i));
   1172     } else if (RetOpcode == X86::TCRETURNri64) {
   1173       BuildMI(MBB, MBBI, DL,
   1174               TII.get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))
   1175           .addReg(JumpTarget.getReg(), RegState::Kill);
   1176     } else {
   1177       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
   1178         addReg(JumpTarget.getReg(), RegState::Kill);
   1179     }
   1180 
   1181     MachineInstr *NewMI = std::prev(MBBI);
   1182     NewMI->copyImplicitOps(MF, MBBI);
   1183 
   1184     // Delete the pseudo instruction TCRETURN.
   1185     MBB.erase(MBBI);
   1186   } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
   1187               RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
   1188              (X86FI->getTCReturnAddrDelta() < 0)) {
   1189     // Add the return addr area delta back since we are not tail calling.
   1190     int delta = -1*X86FI->getTCReturnAddrDelta();
   1191     MBBI = MBB.getLastNonDebugInstr();
   1192 
   1193     // Check for possible merge with preceding ADD instruction.
   1194     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
   1195     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, Uses64BitFramePtr,
   1196                  UseLEAForSP, TII, *RegInfo);
   1197   }
   1198 }
   1199 
   1200 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   1201                                           int FI) const {
   1202   const X86RegisterInfo *RegInfo =
   1203       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1204   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1205   // Offset will hold the offset from the stack pointer at function entry to the
   1206   // object.
   1207   // We need to factor in additional offsets applied during the prologue to the
   1208   // frame, base, and stack pointer depending on which is used.
   1209   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
   1210   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1211   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   1212   uint64_t StackSize = MFI->getStackSize();
   1213   unsigned SlotSize = RegInfo->getSlotSize();
   1214   bool HasFP = hasFP(MF);
   1215   bool IsWinEH = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   1216   int64_t FPDelta = 0;
   1217 
   1218   if (IsWinEH) {
   1219     assert(!MFI->hasCalls() || (StackSize % 16) == 8);
   1220 
   1221     // Calculate required stack adjustment.
   1222     uint64_t FrameSize = StackSize - SlotSize;
   1223     // If required, include space for extra hidden slot for stashing base pointer.
   1224     if (X86FI->getRestoreBasePointer())
   1225       FrameSize += SlotSize;
   1226     uint64_t NumBytes = FrameSize - CSSize;
   1227 
   1228     uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes);
   1229     if (FI && FI == X86FI->getFAIndex())
   1230       return -SEHFrameOffset;
   1231 
   1232     // FPDelta is the offset from the "traditional" FP location of the old base
   1233     // pointer followed by return address and the location required by the
   1234     // restricted Win64 prologue.
   1235     // Add FPDelta to all offsets below that go through the frame pointer.
   1236     FPDelta = FrameSize - SEHFrameOffset;
   1237     assert((!MFI->hasCalls() || (FPDelta % 16) == 0) &&
   1238            "FPDelta isn't aligned per the Win64 ABI!");
   1239   }
   1240 
   1241 
   1242   if (RegInfo->hasBasePointer(MF)) {
   1243     assert(HasFP && "VLAs and dynamic stack realign, but no FP?!");
   1244     if (FI < 0) {
   1245       // Skip the saved EBP.
   1246       return Offset + SlotSize + FPDelta;
   1247     } else {
   1248       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
   1249       return Offset + StackSize;
   1250     }
   1251   } else if (RegInfo->needsStackRealignment(MF)) {
   1252     if (FI < 0) {
   1253       // Skip the saved EBP.
   1254       return Offset + SlotSize + FPDelta;
   1255     } else {
   1256       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
   1257       return Offset + StackSize;
   1258     }
   1259     // FIXME: Support tail calls
   1260   } else {
   1261     if (!HasFP)
   1262       return Offset + StackSize;
   1263 
   1264     // Skip the saved EBP.
   1265     Offset += SlotSize;
   1266 
   1267     // Skip the RETADDR move area
   1268     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   1269     if (TailCallReturnAddrDelta < 0)
   1270       Offset -= TailCallReturnAddrDelta;
   1271   }
   1272 
   1273   return Offset + FPDelta;
   1274 }
   1275 
   1276 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   1277                                              unsigned &FrameReg) const {
   1278   const X86RegisterInfo *RegInfo =
   1279       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1280   // We can't calculate offset from frame pointer if the stack is realigned,
   1281   // so enforce usage of stack/base pointer.  The base pointer is used when we
   1282   // have dynamic allocas in addition to dynamic realignment.
   1283   if (RegInfo->hasBasePointer(MF))
   1284     FrameReg = RegInfo->getBaseRegister();
   1285   else if (RegInfo->needsStackRealignment(MF))
   1286     FrameReg = RegInfo->getStackRegister();
   1287   else
   1288     FrameReg = RegInfo->getFrameRegister(MF);
   1289   return getFrameIndexOffset(MF, FI);
   1290 }
   1291 
   1292 // Simplified from getFrameIndexOffset keeping only StackPointer cases
   1293 int X86FrameLowering::getFrameIndexOffsetFromSP(const MachineFunction &MF, int FI) const {
   1294   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1295   // Does not include any dynamic realign.
   1296   const uint64_t StackSize = MFI->getStackSize();
   1297   {
   1298 #ifndef NDEBUG
   1299     const X86RegisterInfo *RegInfo =
   1300         MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1301     // Note: LLVM arranges the stack as:
   1302     // Args > Saved RetPC (<--FP) > CSRs > dynamic alignment (<--BP)
   1303     //      > "Stack Slots" (<--SP)
   1304     // We can always address StackSlots from RSP.  We can usually (unless
   1305     // needsStackRealignment) address CSRs from RSP, but sometimes need to
   1306     // address them from RBP.  FixedObjects can be placed anywhere in the stack
   1307     // frame depending on their specific requirements (i.e. we can actually
   1308     // refer to arguments to the function which are stored in the *callers*
   1309     // frame).  As a result, THE RESULT OF THIS CALL IS MEANINGLESS FOR CSRs
   1310     // AND FixedObjects IFF needsStackRealignment or hasVarSizedObject.
   1311 
   1312     assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
   1313 
   1314     // We don't handle tail calls, and shouldn't be seeing them
   1315     // either.
   1316     int TailCallReturnAddrDelta =
   1317         MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta();
   1318     assert(!(TailCallReturnAddrDelta < 0) && "we don't handle this case!");
   1319 #endif
   1320   }
   1321 
   1322   // This is how the math works out:
   1323   //
   1324   //  %rsp grows (i.e. gets lower) left to right. Each box below is
   1325   //  one word (eight bytes).  Obj0 is the stack slot we're trying to
   1326   //  get to.
   1327   //
   1328   //    ----------------------------------
   1329   //    | BP | Obj0 | Obj1 | ... | ObjN |
   1330   //    ----------------------------------
   1331   //    ^    ^      ^                   ^
   1332   //    A    B      C                   E
   1333   //
   1334   // A is the incoming stack pointer.
   1335   // (B - A) is the local area offset (-8 for x86-64) [1]
   1336   // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
   1337   //
   1338   // |(E - B)| is the StackSize (absolute value, positive).  For a
   1339   // stack that grown down, this works out to be (B - E). [3]
   1340   //
   1341   // E is also the value of %rsp after stack has been set up, and we
   1342   // want (C - E) -- the value we can add to %rsp to get to Obj0.  Now
   1343   // (C - E) == (C - A) - (B - A) + (B - E)
   1344   //            { Using [1], [2] and [3] above }
   1345   //         == getObjectOffset - LocalAreaOffset + StackSize
   1346   //
   1347 
   1348   // Get the Offset from the StackPointer
   1349   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
   1350 
   1351   return Offset + StackSize;
   1352 }
   1353 // Simplified from getFrameIndexReference keeping only StackPointer cases
   1354 int X86FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
   1355                                                    int FI,
   1356                                                    unsigned &FrameReg) const {
   1357   const X86RegisterInfo *RegInfo =
   1358       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1359   assert(!RegInfo->hasBasePointer(MF) && "we don't handle this case");
   1360 
   1361   FrameReg = RegInfo->getStackRegister();
   1362   return getFrameIndexOffsetFromSP(MF, FI);
   1363 }
   1364 
   1365 bool X86FrameLowering::assignCalleeSavedSpillSlots(
   1366     MachineFunction &MF, const TargetRegisterInfo *TRI,
   1367     std::vector<CalleeSavedInfo> &CSI) const {
   1368   MachineFrameInfo *MFI = MF.getFrameInfo();
   1369   const X86RegisterInfo *RegInfo =
   1370       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1371   unsigned SlotSize = RegInfo->getSlotSize();
   1372   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1373 
   1374   unsigned CalleeSavedFrameSize = 0;
   1375   int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
   1376 
   1377   if (hasFP(MF)) {
   1378     // emitPrologue always spills frame register the first thing.
   1379     SpillSlotOffset -= SlotSize;
   1380     MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
   1381 
   1382     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
   1383     // the frame register, we can delete it from CSI list and not have to worry
   1384     // about avoiding it later.
   1385     unsigned FPReg = RegInfo->getFrameRegister(MF);
   1386     for (unsigned i = 0; i < CSI.size(); ++i) {
   1387       if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) {
   1388         CSI.erase(CSI.begin() + i);
   1389         break;
   1390       }
   1391     }
   1392   }
   1393 
   1394   // Assign slots for GPRs. It increases frame size.
   1395   for (unsigned i = CSI.size(); i != 0; --i) {
   1396     unsigned Reg = CSI[i - 1].getReg();
   1397 
   1398     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
   1399       continue;
   1400 
   1401     SpillSlotOffset -= SlotSize;
   1402     CalleeSavedFrameSize += SlotSize;
   1403 
   1404     int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
   1405     CSI[i - 1].setFrameIdx(SlotIndex);
   1406   }
   1407 
   1408   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
   1409 
   1410   // Assign slots for XMMs.
   1411   for (unsigned i = CSI.size(); i != 0; --i) {
   1412     unsigned Reg = CSI[i - 1].getReg();
   1413     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
   1414       continue;
   1415 
   1416     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
   1417     // ensure alignment
   1418     SpillSlotOffset -= std::abs(SpillSlotOffset) % RC->getAlignment();
   1419     // spill into slot
   1420     SpillSlotOffset -= RC->getSize();
   1421     int SlotIndex =
   1422         MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
   1423     CSI[i - 1].setFrameIdx(SlotIndex);
   1424     MFI->ensureMaxAlignment(RC->getAlignment());
   1425   }
   1426 
   1427   return true;
   1428 }
   1429 
   1430 bool X86FrameLowering::spillCalleeSavedRegisters(
   1431     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
   1432     const std::vector<CalleeSavedInfo> &CSI,
   1433     const TargetRegisterInfo *TRI) const {
   1434   DebugLoc DL = MBB.findDebugLoc(MI);
   1435 
   1436   MachineFunction &MF = *MBB.getParent();
   1437   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   1438   const TargetInstrInfo &TII = *STI.getInstrInfo();
   1439 
   1440   // Push GPRs. It increases frame size.
   1441   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   1442   for (unsigned i = CSI.size(); i != 0; --i) {
   1443     unsigned Reg = CSI[i - 1].getReg();
   1444 
   1445     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
   1446       continue;
   1447     // Add the callee-saved register as live-in. It's killed at the spill.
   1448     MBB.addLiveIn(Reg);
   1449 
   1450     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
   1451       .setMIFlag(MachineInstr::FrameSetup);
   1452   }
   1453 
   1454   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
   1455   // It can be done by spilling XMMs to stack frame.
   1456   for (unsigned i = CSI.size(); i != 0; --i) {
   1457     unsigned Reg = CSI[i-1].getReg();
   1458     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
   1459       continue;
   1460     // Add the callee-saved register as live-in. It's killed at the spill.
   1461     MBB.addLiveIn(Reg);
   1462     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
   1463 
   1464     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
   1465                             TRI);
   1466     --MI;
   1467     MI->setFlag(MachineInstr::FrameSetup);
   1468     ++MI;
   1469   }
   1470 
   1471   return true;
   1472 }
   1473 
   1474 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   1475                                                MachineBasicBlock::iterator MI,
   1476                                         const std::vector<CalleeSavedInfo> &CSI,
   1477                                           const TargetRegisterInfo *TRI) const {
   1478   if (CSI.empty())
   1479     return false;
   1480 
   1481   DebugLoc DL = MBB.findDebugLoc(MI);
   1482 
   1483   MachineFunction &MF = *MBB.getParent();
   1484   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   1485   const TargetInstrInfo &TII = *STI.getInstrInfo();
   1486 
   1487   // Reload XMMs from stack frame.
   1488   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
   1489     unsigned Reg = CSI[i].getReg();
   1490     if (X86::GR64RegClass.contains(Reg) ||
   1491         X86::GR32RegClass.contains(Reg))
   1492       continue;
   1493 
   1494     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
   1495     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
   1496   }
   1497 
   1498   // POP GPRs.
   1499   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
   1500   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
   1501     unsigned Reg = CSI[i].getReg();
   1502     if (!X86::GR64RegClass.contains(Reg) &&
   1503         !X86::GR32RegClass.contains(Reg))
   1504       continue;
   1505 
   1506     BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
   1507   }
   1508   return true;
   1509 }
   1510 
   1511 void
   1512 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   1513                                                        RegScavenger *RS) const {
   1514   MachineFrameInfo *MFI = MF.getFrameInfo();
   1515   const X86RegisterInfo *RegInfo =
   1516       MF.getSubtarget<X86Subtarget>().getRegisterInfo();
   1517   unsigned SlotSize = RegInfo->getSlotSize();
   1518 
   1519   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1520   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   1521 
   1522   if (TailCallReturnAddrDelta < 0) {
   1523     // create RETURNADDR area
   1524     //   arg
   1525     //   arg
   1526     //   RETADDR
   1527     //   { ...
   1528     //     RETADDR area
   1529     //     ...
   1530     //   }
   1531     //   [EBP]
   1532     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
   1533                            TailCallReturnAddrDelta - SlotSize, true);
   1534   }
   1535 
   1536   // Spill the BasePtr if it's used.
   1537   if (RegInfo->hasBasePointer(MF))
   1538     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
   1539 }
   1540 
   1541 static bool
   1542 HasNestArgument(const MachineFunction *MF) {
   1543   const Function *F = MF->getFunction();
   1544   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   1545        I != E; I++) {
   1546     if (I->hasNestAttr())
   1547       return true;
   1548   }
   1549   return false;
   1550 }
   1551 
   1552 /// GetScratchRegister - Get a temp register for performing work in the
   1553 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
   1554 /// and the properties of the function either one or two registers will be
   1555 /// needed. Set primary to true for the first register, false for the second.
   1556 static unsigned
   1557 GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
   1558   CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
   1559 
   1560   // Erlang stuff.
   1561   if (CallingConvention == CallingConv::HiPE) {
   1562     if (Is64Bit)
   1563       return Primary ? X86::R14 : X86::R13;
   1564     else
   1565       return Primary ? X86::EBX : X86::EDI;
   1566   }
   1567 
   1568   if (Is64Bit) {
   1569     if (IsLP64)
   1570       return Primary ? X86::R11 : X86::R12;
   1571     else
   1572       return Primary ? X86::R11D : X86::R12D;
   1573   }
   1574 
   1575   bool IsNested = HasNestArgument(&MF);
   1576 
   1577   if (CallingConvention == CallingConv::X86_FastCall ||
   1578       CallingConvention == CallingConv::Fast) {
   1579     if (IsNested)
   1580       report_fatal_error("Segmented stacks does not support fastcall with "
   1581                          "nested function.");
   1582     return Primary ? X86::EAX : X86::ECX;
   1583   }
   1584   if (IsNested)
   1585     return Primary ? X86::EDX : X86::EAX;
   1586   return Primary ? X86::ECX : X86::EAX;
   1587 }
   1588 
   1589 // The stack limit in the TCB is set to this many bytes above the actual stack
   1590 // limit.
   1591 static const uint64_t kSplitStackAvailable = 256;
   1592 
   1593 void
   1594 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   1595   MachineBasicBlock &prologueMBB = MF.front();
   1596   MachineFrameInfo *MFI = MF.getFrameInfo();
   1597   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   1598   const TargetInstrInfo &TII = *STI.getInstrInfo();
   1599   uint64_t StackSize;
   1600   bool Is64Bit = STI.is64Bit();
   1601   const bool IsLP64 = STI.isTarget64BitLP64();
   1602   unsigned TlsReg, TlsOffset;
   1603   DebugLoc DL;
   1604 
   1605   unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
   1606   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
   1607          "Scratch register is live-in");
   1608 
   1609   if (MF.getFunction()->isVarArg())
   1610     report_fatal_error("Segmented stacks do not support vararg functions.");
   1611   if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() &&
   1612       !STI.isTargetWin64() && !STI.isTargetFreeBSD() &&
   1613       !STI.isTargetDragonFly())
   1614     report_fatal_error("Segmented stacks not supported on this platform.");
   1615 
   1616   // Eventually StackSize will be calculated by a link-time pass; which will
   1617   // also decide whether checking code needs to be injected into this particular
   1618   // prologue.
   1619   StackSize = MFI->getStackSize();
   1620 
   1621   // Do not generate a prologue for functions with a stack of size zero
   1622   if (StackSize == 0)
   1623     return;
   1624 
   1625   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
   1626   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
   1627   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1628   bool IsNested = false;
   1629 
   1630   // We need to know if the function has a nest argument only in 64 bit mode.
   1631   if (Is64Bit)
   1632     IsNested = HasNestArgument(&MF);
   1633 
   1634   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
   1635   // allocMBB needs to be last (terminating) instruction.
   1636 
   1637   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
   1638          e = prologueMBB.livein_end(); i != e; i++) {
   1639     allocMBB->addLiveIn(*i);
   1640     checkMBB->addLiveIn(*i);
   1641   }
   1642 
   1643   if (IsNested)
   1644     allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);
   1645 
   1646   MF.push_front(allocMBB);
   1647   MF.push_front(checkMBB);
   1648 
   1649   // When the frame size is less than 256 we just compare the stack
   1650   // boundary directly to the value of the stack pointer, per gcc.
   1651   bool CompareStackPointer = StackSize < kSplitStackAvailable;
   1652 
   1653   // Read the limit off the current stacklet off the stack_guard location.
   1654   if (Is64Bit) {
   1655     if (STI.isTargetLinux()) {
   1656       TlsReg = X86::FS;
   1657       TlsOffset = IsLP64 ? 0x70 : 0x40;
   1658     } else if (STI.isTargetDarwin()) {
   1659       TlsReg = X86::GS;
   1660       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
   1661     } else if (STI.isTargetWin64()) {
   1662       TlsReg = X86::GS;
   1663       TlsOffset = 0x28; // pvArbitrary, reserved for application use
   1664     } else if (STI.isTargetFreeBSD()) {
   1665       TlsReg = X86::FS;
   1666       TlsOffset = 0x18;
   1667     } else if (STI.isTargetDragonFly()) {
   1668       TlsReg = X86::FS;
   1669       TlsOffset = 0x20; // use tls_tcb.tcb_segstack
   1670     } else {
   1671       report_fatal_error("Segmented stacks not supported on this platform.");
   1672     }
   1673 
   1674     if (CompareStackPointer)
   1675       ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
   1676     else
   1677       BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
   1678         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
   1679 
   1680     BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
   1681       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   1682   } else {
   1683     if (STI.isTargetLinux()) {
   1684       TlsReg = X86::GS;
   1685       TlsOffset = 0x30;
   1686     } else if (STI.isTargetDarwin()) {
   1687       TlsReg = X86::GS;
   1688       TlsOffset = 0x48 + 90*4;
   1689     } else if (STI.isTargetWin32()) {
   1690       TlsReg = X86::FS;
   1691       TlsOffset = 0x14; // pvArbitrary, reserved for application use
   1692     } else if (STI.isTargetDragonFly()) {
   1693       TlsReg = X86::FS;
   1694       TlsOffset = 0x10; // use tls_tcb.tcb_segstack
   1695     } else if (STI.isTargetFreeBSD()) {
   1696       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
   1697     } else {
   1698       report_fatal_error("Segmented stacks not supported on this platform.");
   1699     }
   1700 
   1701     if (CompareStackPointer)
   1702       ScratchReg = X86::ESP;
   1703     else
   1704       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
   1705         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
   1706 
   1707     if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() ||
   1708         STI.isTargetDragonFly()) {
   1709       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
   1710         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   1711     } else if (STI.isTargetDarwin()) {
   1712 
   1713       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
   1714       unsigned ScratchReg2;
   1715       bool SaveScratch2;
   1716       if (CompareStackPointer) {
   1717         // The primary scratch register is available for holding the TLS offset.
   1718         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
   1719         SaveScratch2 = false;
   1720       } else {
   1721         // Need to use a second register to hold the TLS offset
   1722         ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);
   1723 
   1724         // Unfortunately, with fastcc the second scratch register may hold an
   1725         // argument.
   1726         SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
   1727       }
   1728 
   1729       // If Scratch2 is live-in then it needs to be saved.
   1730       assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
   1731              "Scratch register is live-in and not saved");
   1732 
   1733       if (SaveScratch2)
   1734         BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
   1735           .addReg(ScratchReg2, RegState::Kill);
   1736 
   1737       BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
   1738         .addImm(TlsOffset);
   1739       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
   1740         .addReg(ScratchReg)
   1741         .addReg(ScratchReg2).addImm(1).addReg(0)
   1742         .addImm(0)
   1743         .addReg(TlsReg);
   1744 
   1745       if (SaveScratch2)
   1746         BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
   1747     }
   1748   }
   1749 
   1750   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
   1751   // It jumps to normal execution of the function body.
   1752   BuildMI(checkMBB, DL, TII.get(X86::JA_1)).addMBB(&prologueMBB);
   1753 
   1754   // On 32 bit we first push the arguments size and then the frame size. On 64
   1755   // bit, we pass the stack frame size in r10 and the argument size in r11.
   1756   if (Is64Bit) {
   1757     // Functions with nested arguments use R10, so it needs to be saved across
   1758     // the call to _morestack
   1759 
   1760     const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
   1761     const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
   1762     const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
   1763     const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
   1764     const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;
   1765 
   1766     if (IsNested)
   1767       BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
   1768 
   1769     BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
   1770       .addImm(StackSize);
   1771     BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
   1772       .addImm(X86FI->getArgumentStackSize());
   1773     MF.getRegInfo().setPhysRegUsed(Reg10);
   1774     MF.getRegInfo().setPhysRegUsed(Reg11);
   1775   } else {
   1776     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
   1777       .addImm(X86FI->getArgumentStackSize());
   1778     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
   1779       .addImm(StackSize);
   1780   }
   1781 
   1782   // __morestack is in libgcc
   1783   if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) {
   1784     // Under the large code model, we cannot assume that __morestack lives
   1785     // within 2^31 bytes of the call site, so we cannot use pc-relative
   1786     // addressing. We cannot perform the call via a temporary register,
   1787     // as the rax register may be used to store the static chain, and all
   1788     // other suitable registers may be either callee-save or used for
   1789     // parameter passing. We cannot use the stack at this point either
   1790     // because __morestack manipulates the stack directly.
   1791     //
   1792     // To avoid these issues, perform an indirect call via a read-only memory
   1793     // location containing the address.
   1794     //
   1795     // This solution is not perfect, as it assumes that the .rodata section
   1796     // is laid out within 2^31 bytes of each function body, but this seems
   1797     // to be sufficient for JIT.
   1798     BuildMI(allocMBB, DL, TII.get(X86::CALL64m))
   1799         .addReg(X86::RIP)
   1800         .addImm(0)
   1801         .addReg(0)
   1802         .addExternalSymbol("__morestack_addr")
   1803         .addReg(0);
   1804     MF.getMMI().setUsesMorestackAddr(true);
   1805   } else {
   1806     if (Is64Bit)
   1807       BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
   1808         .addExternalSymbol("__morestack");
   1809     else
   1810       BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
   1811         .addExternalSymbol("__morestack");
   1812   }
   1813 
   1814   if (IsNested)
   1815     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
   1816   else
   1817     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
   1818 
   1819   allocMBB->addSuccessor(&prologueMBB);
   1820 
   1821   checkMBB->addSuccessor(allocMBB);
   1822   checkMBB->addSuccessor(&prologueMBB);
   1823 
   1824 #ifdef XDEBUG
   1825   MF.verify();
   1826 #endif
   1827 }
   1828 
   1829 /// Erlang programs may need a special prologue to handle the stack size they
   1830 /// might need at runtime. That is because Erlang/OTP does not implement a C
   1831 /// stack but uses a custom implementation of hybrid stack/heap architecture.
   1832 /// (for more information see Eric Stenman's Ph.D. thesis:
   1833 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
   1834 ///
   1835 /// CheckStack:
   1836 ///       temp0 = sp - MaxStack
   1837 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
   1838 /// OldStart:
   1839 ///       ...
   1840 /// IncStack:
   1841 ///       call inc_stack   # doubles the stack space
   1842 ///       temp0 = sp - MaxStack
   1843 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
   1844 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
   1845   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   1846   const TargetInstrInfo &TII = *STI.getInstrInfo();
   1847   MachineFrameInfo *MFI = MF.getFrameInfo();
   1848   const unsigned SlotSize = STI.getRegisterInfo()->getSlotSize();
   1849   const bool Is64Bit = STI.is64Bit();
   1850   const bool IsLP64 = STI.isTarget64BitLP64();
   1851   DebugLoc DL;
   1852   // HiPE-specific values
   1853   const unsigned HipeLeafWords = 24;
   1854   const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
   1855   const unsigned Guaranteed = HipeLeafWords * SlotSize;
   1856   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
   1857                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
   1858   unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
   1859 
   1860   assert(STI.isTargetLinux() &&
   1861          "HiPE prologue is only supported on Linux operating systems.");
   1862 
   1863   // Compute the largest caller's frame that is needed to fit the callees'
   1864   // frames. This 'MaxStack' is computed from:
   1865   //
   1866   // a) the fixed frame size, which is the space needed for all spilled temps,
   1867   // b) outgoing on-stack parameter areas, and
   1868   // c) the minimum stack space this function needs to make available for the
   1869   //    functions it calls (a tunable ABI property).
   1870   if (MFI->hasCalls()) {
   1871     unsigned MoreStackForCalls = 0;
   1872 
   1873     for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
   1874          MBBI != MBBE; ++MBBI)
   1875       for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
   1876            MI != ME; ++MI) {
   1877         if (!MI->isCall())
   1878           continue;
   1879 
   1880         // Get callee operand.
   1881         const MachineOperand &MO = MI->getOperand(0);
   1882 
   1883         // Only take account of global function calls (no closures etc.).
   1884         if (!MO.isGlobal())
   1885           continue;
   1886 
   1887         const Function *F = dyn_cast<Function>(MO.getGlobal());
   1888         if (!F)
   1889           continue;
   1890 
   1891         // Do not update 'MaxStack' for primitive and built-in functions
   1892         // (encoded with names either starting with "erlang."/"bif_" or not
   1893         // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
   1894         // "_", such as the BIF "suspend_0") as they are executed on another
   1895         // stack.
   1896         if (F->getName().find("erlang.") != StringRef::npos ||
   1897             F->getName().find("bif_") != StringRef::npos ||
   1898             F->getName().find_first_of("._") == StringRef::npos)
   1899           continue;
   1900 
   1901         unsigned CalleeStkArity =
   1902           F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
   1903         if (HipeLeafWords - 1 > CalleeStkArity)
   1904           MoreStackForCalls = std::max(MoreStackForCalls,
   1905                                (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
   1906       }
   1907     MaxStack += MoreStackForCalls;
   1908   }
   1909 
   1910   // If the stack frame needed is larger than the guaranteed then runtime checks
   1911   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
   1912   if (MaxStack > Guaranteed) {
   1913     MachineBasicBlock &prologueMBB = MF.front();
   1914     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
   1915     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
   1916 
   1917     for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
   1918            E = prologueMBB.livein_end(); I != E; I++) {
   1919       stackCheckMBB->addLiveIn(*I);
   1920       incStackMBB->addLiveIn(*I);
   1921     }
   1922 
   1923     MF.push_front(incStackMBB);
   1924     MF.push_front(stackCheckMBB);
   1925 
   1926     unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
   1927     unsigned LEAop, CMPop, CALLop;
   1928     if (Is64Bit) {
   1929       SPReg = X86::RSP;
   1930       PReg  = X86::RBP;
   1931       LEAop = X86::LEA64r;
   1932       CMPop = X86::CMP64rm;
   1933       CALLop = X86::CALL64pcrel32;
   1934       SPLimitOffset = 0x90;
   1935     } else {
   1936       SPReg = X86::ESP;
   1937       PReg  = X86::EBP;
   1938       LEAop = X86::LEA32r;
   1939       CMPop = X86::CMP32rm;
   1940       CALLop = X86::CALLpcrel32;
   1941       SPLimitOffset = 0x4c;
   1942     }
   1943 
   1944     ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
   1945     assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
   1946            "HiPE prologue scratch register is live-in");
   1947 
   1948     // Create new MBB for StackCheck:
   1949     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
   1950                  SPReg, false, -MaxStack);
   1951     // SPLimitOffset is in a fixed heap location (pointed by BP).
   1952     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
   1953                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
   1954     BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_1)).addMBB(&prologueMBB);
   1955 
   1956     // Create new MBB for IncStack:
   1957     BuildMI(incStackMBB, DL, TII.get(CALLop)).
   1958       addExternalSymbol("inc_stack_0");
   1959     addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
   1960                  SPReg, false, -MaxStack);
   1961     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
   1962                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
   1963     BuildMI(incStackMBB, DL, TII.get(X86::JLE_1)).addMBB(incStackMBB);
   1964 
   1965     stackCheckMBB->addSuccessor(&prologueMBB, 99);
   1966     stackCheckMBB->addSuccessor(incStackMBB, 1);
   1967     incStackMBB->addSuccessor(&prologueMBB, 99);
   1968     incStackMBB->addSuccessor(incStackMBB, 1);
   1969   }
   1970 #ifdef XDEBUG
   1971   MF.verify();
   1972 #endif
   1973 }
   1974 
   1975 void X86FrameLowering::
   1976 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   1977                               MachineBasicBlock::iterator I) const {
   1978   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   1979   const TargetInstrInfo &TII = *STI.getInstrInfo();
   1980   const X86RegisterInfo &RegInfo = *STI.getRegisterInfo();
   1981   unsigned StackPtr = RegInfo.getStackRegister();
   1982   bool reserveCallFrame = hasReservedCallFrame(MF);
   1983   int Opcode = I->getOpcode();
   1984   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
   1985   bool IsLP64 = STI.isTarget64BitLP64();
   1986   DebugLoc DL = I->getDebugLoc();
   1987   uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
   1988   uint64_t InternalAmt = (isDestroy || Amount) ? I->getOperand(1).getImm() : 0;
   1989   I = MBB.erase(I);
   1990 
   1991   if (!reserveCallFrame) {
   1992     // If the stack pointer can be changed after prologue, turn the
   1993     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
   1994     // adjcallstackdown instruction into 'add ESP, <amt>'
   1995     if (Amount == 0)
   1996       return;
   1997 
   1998     // We need to keep the stack aligned properly.  To do this, we round the
   1999     // amount of space needed for the outgoing arguments up to the next
   2000     // alignment boundary.
   2001     unsigned StackAlign = getStackAlignment();
   2002     Amount = RoundUpToAlignment(Amount, StackAlign);
   2003 
   2004     MachineInstr *New = nullptr;
   2005 
   2006     // Factor out the amount that gets handled inside the sequence
   2007     // (Pushes of argument for frame setup, callee pops for frame destroy)
   2008     Amount -= InternalAmt;
   2009 
   2010     if (Amount) {
   2011       if (Opcode == TII.getCallFrameSetupOpcode()) {
   2012         New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)), StackPtr)
   2013           .addReg(StackPtr).addImm(Amount);
   2014       } else {
   2015         assert(Opcode == TII.getCallFrameDestroyOpcode());
   2016 
   2017         unsigned Opc = getADDriOpcode(IsLP64, Amount);
   2018         New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
   2019           .addReg(StackPtr).addImm(Amount);
   2020       }
   2021     }
   2022 
   2023     if (New) {
   2024       // The EFLAGS implicit def is dead.
   2025       New->getOperand(3).setIsDead();
   2026 
   2027       // Replace the pseudo instruction with a new instruction.
   2028       MBB.insert(I, New);
   2029     }
   2030 
   2031     return;
   2032   }
   2033 
   2034   if (Opcode == TII.getCallFrameDestroyOpcode() && InternalAmt) {
   2035     // If we are performing frame pointer elimination and if the callee pops
   2036     // something off the stack pointer, add it back.  We do this until we have
   2037     // more advanced stack pointer tracking ability.
   2038     unsigned Opc = getSUBriOpcode(IsLP64, InternalAmt);
   2039     MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
   2040       .addReg(StackPtr).addImm(InternalAmt);
   2041 
   2042     // The EFLAGS implicit def is dead.
   2043     New->getOperand(3).setIsDead();
   2044 
   2045     // We are not tracking the stack pointer adjustment by the callee, so make
   2046     // sure we restore the stack pointer immediately after the call, there may
   2047     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
   2048     MachineBasicBlock::iterator B = MBB.begin();
   2049     while (I != B && !std::prev(I)->isCall())
   2050       --I;
   2051     MBB.insert(I, New);
   2052   }
   2053 }
   2054 
   2055