Home | History | Annotate | Download | only in X86
      1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the X86 implementation of TargetFrameLowering class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "X86FrameLowering.h"
     15 #include "X86InstrBuilder.h"
     16 #include "X86InstrInfo.h"
     17 #include "X86MachineFunctionInfo.h"
     18 #include "X86Subtarget.h"
     19 #include "X86TargetMachine.h"
     20 #include "llvm/ADT/SmallSet.h"
     21 #include "llvm/CodeGen/MachineFrameInfo.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineInstrBuilder.h"
     24 #include "llvm/CodeGen/MachineModuleInfo.h"
     25 #include "llvm/CodeGen/MachineRegisterInfo.h"
     26 #include "llvm/IR/DataLayout.h"
     27 #include "llvm/IR/Function.h"
     28 #include "llvm/MC/MCAsmInfo.h"
     29 #include "llvm/MC/MCSymbol.h"
     30 #include "llvm/Support/CommandLine.h"
     31 #include "llvm/Target/TargetOptions.h"
     32 #include "llvm/Support/Debug.h"
     33 
     34 using namespace llvm;
     35 
     36 // FIXME: completely move here.
     37 extern cl::opt<bool> ForceStackAlign;
     38 
     39 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
     40   return !MF.getFrameInfo()->hasVarSizedObjects();
     41 }
     42 
     43 /// hasFP - Return true if the specified function should have a dedicated frame
     44 /// pointer register.  This is true if the function has variable sized allocas
     45 /// or if frame pointer elimination is disabled.
     46 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
     47   const MachineFrameInfo *MFI = MF.getFrameInfo();
     48   const MachineModuleInfo &MMI = MF.getMMI();
     49   const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
     50 
     51   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
     52           RegInfo->needsStackRealignment(MF) ||
     53           MFI->hasVarSizedObjects() ||
     54           MFI->isFrameAddressTaken() || MFI->hasInlineAsmWithSPAdjust() ||
     55           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
     56           MMI.callsUnwindInit() || MMI.callsEHReturn());
     57 }
     58 
     59 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
     60   if (IsLP64) {
     61     if (isInt<8>(Imm))
     62       return X86::SUB64ri8;
     63     return X86::SUB64ri32;
     64   } else {
     65     if (isInt<8>(Imm))
     66       return X86::SUB32ri8;
     67     return X86::SUB32ri;
     68   }
     69 }
     70 
     71 static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
     72   if (IsLP64) {
     73     if (isInt<8>(Imm))
     74       return X86::ADD64ri8;
     75     return X86::ADD64ri32;
     76   } else {
     77     if (isInt<8>(Imm))
     78       return X86::ADD32ri8;
     79     return X86::ADD32ri;
     80   }
     81 }
     82 
     83 static unsigned getLEArOpcode(unsigned IsLP64) {
     84   return IsLP64 ? X86::LEA64r : X86::LEA32r;
     85 }
     86 
     87 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
     88 /// when it reaches the "return" instruction. We can then pop a stack object
     89 /// to this register without worry about clobbering it.
     90 static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
     91                                        MachineBasicBlock::iterator &MBBI,
     92                                        const TargetRegisterInfo &TRI,
     93                                        bool Is64Bit) {
     94   const MachineFunction *MF = MBB.getParent();
     95   const Function *F = MF->getFunction();
     96   if (!F || MF->getMMI().callsEHReturn())
     97     return 0;
     98 
     99   static const uint16_t CallerSavedRegs32Bit[] = {
    100     X86::EAX, X86::EDX, X86::ECX, 0
    101   };
    102 
    103   static const uint16_t CallerSavedRegs64Bit[] = {
    104     X86::RAX, X86::RDX, X86::RCX, X86::RSI, X86::RDI,
    105     X86::R8,  X86::R9,  X86::R10, X86::R11, 0
    106   };
    107 
    108   unsigned Opc = MBBI->getOpcode();
    109   switch (Opc) {
    110   default: return 0;
    111   case X86::RETL:
    112   case X86::RETQ:
    113   case X86::RETIL:
    114   case X86::RETIQ:
    115   case X86::TCRETURNdi:
    116   case X86::TCRETURNri:
    117   case X86::TCRETURNmi:
    118   case X86::TCRETURNdi64:
    119   case X86::TCRETURNri64:
    120   case X86::TCRETURNmi64:
    121   case X86::EH_RETURN:
    122   case X86::EH_RETURN64: {
    123     SmallSet<uint16_t, 8> Uses;
    124     for (unsigned i = 0, e = MBBI->getNumOperands(); i != e; ++i) {
    125       MachineOperand &MO = MBBI->getOperand(i);
    126       if (!MO.isReg() || MO.isDef())
    127         continue;
    128       unsigned Reg = MO.getReg();
    129       if (!Reg)
    130         continue;
    131       for (MCRegAliasIterator AI(Reg, &TRI, true); AI.isValid(); ++AI)
    132         Uses.insert(*AI);
    133     }
    134 
    135     const uint16_t *CS = Is64Bit ? CallerSavedRegs64Bit : CallerSavedRegs32Bit;
    136     for (; *CS; ++CS)
    137       if (!Uses.count(*CS))
    138         return *CS;
    139   }
    140   }
    141 
    142   return 0;
    143 }
    144 
    145 
    146 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
    147 /// stack pointer by a constant value.
    148 static
    149 void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
    150                   unsigned StackPtr, int64_t NumBytes,
    151                   bool Is64Bit, bool IsLP64, bool UseLEA,
    152                   const TargetInstrInfo &TII, const TargetRegisterInfo &TRI) {
    153   bool isSub = NumBytes < 0;
    154   uint64_t Offset = isSub ? -NumBytes : NumBytes;
    155   unsigned Opc;
    156   if (UseLEA)
    157     Opc = getLEArOpcode(IsLP64);
    158   else
    159     Opc = isSub
    160       ? getSUBriOpcode(IsLP64, Offset)
    161       : getADDriOpcode(IsLP64, Offset);
    162 
    163   uint64_t Chunk = (1LL << 31) - 1;
    164   DebugLoc DL = MBB.findDebugLoc(MBBI);
    165 
    166   while (Offset) {
    167     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
    168     if (ThisVal == (Is64Bit ? 8 : 4)) {
    169       // Use push / pop instead.
    170       unsigned Reg = isSub
    171         ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX)
    172         : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64Bit);
    173       if (Reg) {
    174         Opc = isSub
    175           ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
    176           : (Is64Bit ? X86::POP64r  : X86::POP32r);
    177         MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
    178           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
    179         if (isSub)
    180           MI->setFlag(MachineInstr::FrameSetup);
    181         Offset -= ThisVal;
    182         continue;
    183       }
    184     }
    185 
    186     MachineInstr *MI = nullptr;
    187 
    188     if (UseLEA) {
    189       MI =  addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
    190                           StackPtr, false, isSub ? -ThisVal : ThisVal);
    191     } else {
    192       MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
    193             .addReg(StackPtr)
    194             .addImm(ThisVal);
    195       MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
    196     }
    197 
    198     if (isSub)
    199       MI->setFlag(MachineInstr::FrameSetup);
    200 
    201     Offset -= ThisVal;
    202   }
    203 }
    204 
    205 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
    206 static
    207 void mergeSPUpdatesUp(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
    208                       unsigned StackPtr, uint64_t *NumBytes = nullptr) {
    209   if (MBBI == MBB.begin()) return;
    210 
    211   MachineBasicBlock::iterator PI = std::prev(MBBI);
    212   unsigned Opc = PI->getOpcode();
    213   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
    214        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
    215        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
    216       PI->getOperand(0).getReg() == StackPtr) {
    217     if (NumBytes)
    218       *NumBytes += PI->getOperand(2).getImm();
    219     MBB.erase(PI);
    220   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
    221               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
    222              PI->getOperand(0).getReg() == StackPtr) {
    223     if (NumBytes)
    224       *NumBytes -= PI->getOperand(2).getImm();
    225     MBB.erase(PI);
    226   }
    227 }
    228 
    229 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower
    230 /// iterator.
    231 static
    232 void mergeSPUpdatesDown(MachineBasicBlock &MBB,
    233                         MachineBasicBlock::iterator &MBBI,
    234                         unsigned StackPtr, uint64_t *NumBytes = nullptr) {
    235   // FIXME:  THIS ISN'T RUN!!!
    236   return;
    237 
    238   if (MBBI == MBB.end()) return;
    239 
    240   MachineBasicBlock::iterator NI = std::next(MBBI);
    241   if (NI == MBB.end()) return;
    242 
    243   unsigned Opc = NI->getOpcode();
    244   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
    245        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
    246       NI->getOperand(0).getReg() == StackPtr) {
    247     if (NumBytes)
    248       *NumBytes -= NI->getOperand(2).getImm();
    249     MBB.erase(NI);
    250     MBBI = NI;
    251   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
    252               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
    253              NI->getOperand(0).getReg() == StackPtr) {
    254     if (NumBytes)
    255       *NumBytes += NI->getOperand(2).getImm();
    256     MBB.erase(NI);
    257     MBBI = NI;
    258   }
    259 }
    260 
    261 /// mergeSPUpdates - Checks the instruction before/after the passed
    262 /// instruction. If it is an ADD/SUB/LEA instruction it is deleted argument and
    263 /// the stack adjustment is returned as a positive value for ADD/LEA and a
    264 /// negative for SUB.
    265 static int mergeSPUpdates(MachineBasicBlock &MBB,
    266                           MachineBasicBlock::iterator &MBBI, unsigned StackPtr,
    267                           bool doMergeWithPrevious) {
    268   if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
    269       (!doMergeWithPrevious && MBBI == MBB.end()))
    270     return 0;
    271 
    272   MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI;
    273   MachineBasicBlock::iterator NI = doMergeWithPrevious ? nullptr
    274                                                        : std::next(MBBI);
    275   unsigned Opc = PI->getOpcode();
    276   int Offset = 0;
    277 
    278   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
    279        Opc == X86::ADD32ri || Opc == X86::ADD32ri8 ||
    280        Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
    281       PI->getOperand(0).getReg() == StackPtr){
    282     Offset += PI->getOperand(2).getImm();
    283     MBB.erase(PI);
    284     if (!doMergeWithPrevious) MBBI = NI;
    285   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
    286               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
    287              PI->getOperand(0).getReg() == StackPtr) {
    288     Offset -= PI->getOperand(2).getImm();
    289     MBB.erase(PI);
    290     if (!doMergeWithPrevious) MBBI = NI;
    291   }
    292 
    293   return Offset;
    294 }
    295 
    296 static bool isEAXLiveIn(MachineFunction &MF) {
    297   for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
    298        EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
    299     unsigned Reg = II->first;
    300 
    301     if (Reg == X86::EAX || Reg == X86::AX ||
    302         Reg == X86::AH || Reg == X86::AL)
    303       return true;
    304   }
    305 
    306   return false;
    307 }
    308 
    309 void
    310 X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
    311                                             MachineBasicBlock::iterator MBBI,
    312                                             DebugLoc DL) const {
    313   MachineFunction &MF = *MBB.getParent();
    314   MachineFrameInfo *MFI = MF.getFrameInfo();
    315   MachineModuleInfo &MMI = MF.getMMI();
    316   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
    317   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    318 
    319   // Add callee saved registers to move list.
    320   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    321   if (CSI.empty()) return;
    322 
    323   // Calculate offsets.
    324   for (std::vector<CalleeSavedInfo>::const_iterator
    325          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
    326     int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
    327     unsigned Reg = I->getReg();
    328 
    329     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
    330     unsigned CFIIndex =
    331         MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg,
    332                                                         Offset));
    333     BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    334         .addCFIIndex(CFIIndex);
    335   }
    336 }
    337 
    338 /// usesTheStack - This function checks if any of the users of EFLAGS
    339 /// copies the EFLAGS. We know that the code that lowers COPY of EFLAGS has
    340 /// to use the stack, and if we don't adjust the stack we clobber the first
    341 /// frame index.
    342 /// See X86InstrInfo::copyPhysReg.
    343 static bool usesTheStack(const MachineFunction &MF) {
    344   const MachineRegisterInfo &MRI = MF.getRegInfo();
    345 
    346   for (MachineRegisterInfo::reg_instr_iterator
    347        ri = MRI.reg_instr_begin(X86::EFLAGS), re = MRI.reg_instr_end();
    348        ri != re; ++ri)
    349     if (ri->isCopy())
    350       return true;
    351 
    352   return false;
    353 }
    354 
    355 /// emitPrologue - Push callee-saved registers onto the stack, which
    356 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
    357 /// space for local variables. Also emit labels used by the exception handler to
    358 /// generate the exception handling frames.
    359 
    360 /*
    361   Here's a gist of what gets emitted:
    362 
    363   ; Establish frame pointer, if needed
    364   [if needs FP]
    365       push  %rbp
    366       .cfi_def_cfa_offset 16
    367       .cfi_offset %rbp, -16
    368       .seh_pushreg %rpb
    369       mov  %rsp, %rbp
    370       .cfi_def_cfa_register %rbp
    371 
    372   ; Spill general-purpose registers
    373   [for all callee-saved GPRs]
    374       pushq %<reg>
    375       [if not needs FP]
    376          .cfi_def_cfa_offset (offset from RETADDR)
    377       .seh_pushreg %<reg>
    378 
    379   ; If the required stack alignment > default stack alignment
    380   ; rsp needs to be re-aligned.  This creates a "re-alignment gap"
    381   ; of unknown size in the stack frame.
    382   [if stack needs re-alignment]
    383       and  $MASK, %rsp
    384 
    385   ; Allocate space for locals
    386   [if target is Windows and allocated space > 4096 bytes]
    387       ; Windows needs special care for allocations larger
    388       ; than one page.
    389       mov $NNN, %rax
    390       call ___chkstk_ms/___chkstk
    391       sub  %rax, %rsp
    392   [else]
    393       sub  $NNN, %rsp
    394 
    395   [if needs FP]
    396       .seh_stackalloc (size of XMM spill slots)
    397       .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
    398   [else]
    399       .seh_stackalloc NNN
    400 
    401   ; Spill XMMs
    402   ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
    403   ; they may get spilled on any platform, if the current function
    404   ; calls @llvm.eh.unwind.init
    405   [if needs FP]
    406       [for all callee-saved XMM registers]
    407           movaps  %<xmm reg>, -MMM(%rbp)
    408       [for all callee-saved XMM registers]
    409           .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
    410               ; i.e. the offset relative to (%rbp - SEHFrameOffset)
    411   [else]
    412       [for all callee-saved XMM registers]
    413           movaps  %<xmm reg>, KKK(%rsp)
    414       [for all callee-saved XMM registers]
    415           .seh_savexmm %<xmm reg>, KKK
    416 
    417   .seh_endprologue
    418 
    419   [if needs base pointer]
    420       mov  %rsp, %rbx
    421 
    422   ; Emit CFI info
    423   [if needs FP]
    424       [for all callee-saved registers]
    425           .cfi_offset %<reg>, (offset from %rbp)
    426   [else]
    427        .cfi_def_cfa_offset (offset from RETADDR)
    428       [for all callee-saved registers]
    429           .cfi_offset %<reg>, (offset from %rsp)
    430 
    431   Notes:
    432   - .seh directives are emitted only for Windows 64 ABI
    433   - .cfi directives are emitted for all other ABIs
    434   - for 32-bit code, substitute %e?? registers for %r??
    435 */
    436 
    437 void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
    438   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
    439   MachineBasicBlock::iterator MBBI = MBB.begin();
    440   MachineFrameInfo *MFI = MF.getFrameInfo();
    441   const Function *Fn = MF.getFunction();
    442   const X86RegisterInfo *RegInfo =
    443       static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
    444   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    445   MachineModuleInfo &MMI = MF.getMMI();
    446   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
    447   uint64_t MaxAlign  = MFI->getMaxAlignment(); // Desired stack alignment.
    448   uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
    449   bool HasFP = hasFP(MF);
    450   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
    451   bool Is64Bit = STI.is64Bit();
    452   bool IsLP64 = STI.isTarget64BitLP64();
    453   bool IsWin64 = STI.isTargetWin64();
    454   bool IsWinEH =
    455       MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() ==
    456       ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64.
    457   bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry();
    458   bool NeedsDwarfCFI =
    459       !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry());
    460   bool UseLEA = STI.useLeaForSP();
    461   unsigned StackAlign = getStackAlignment();
    462   unsigned SlotSize = RegInfo->getSlotSize();
    463   unsigned FramePtr = RegInfo->getFrameRegister(MF);
    464   unsigned StackPtr = RegInfo->getStackRegister();
    465   unsigned BasePtr = RegInfo->getBaseRegister();
    466   DebugLoc DL;
    467 
    468   // If we're forcing a stack realignment we can't rely on just the frame
    469   // info, we need to know the ABI stack alignment as well in case we
    470   // have a call out.  Otherwise just make sure we have some alignment - we'll
    471   // go with the minimum SlotSize.
    472   if (ForceStackAlign) {
    473     if (MFI->hasCalls())
    474       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
    475     else if (MaxAlign < SlotSize)
    476       MaxAlign = SlotSize;
    477   }
    478 
    479   // Add RETADDR move area to callee saved frame size.
    480   int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
    481   if (TailCallReturnAddrDelta < 0)
    482     X86FI->setCalleeSavedFrameSize(
    483       X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta);
    484 
    485   // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
    486   // function, and use up to 128 bytes of stack space, don't have a frame
    487   // pointer, calls, or dynamic alloca then we do not need to adjust the
    488   // stack pointer (we fit in the Red Zone). We also check that we don't
    489   // push and pop from the stack.
    490   if (Is64Bit && !Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
    491                                                    Attribute::NoRedZone) &&
    492       !RegInfo->needsStackRealignment(MF) &&
    493       !MFI->hasVarSizedObjects() &&                     // No dynamic alloca.
    494       !MFI->adjustsStack() &&                           // No calls.
    495       !IsWin64 &&                                       // Win64 has no Red Zone
    496       !usesTheStack(MF) &&                              // Don't push and pop.
    497       !MF.shouldSplitStack()) {                         // Regular stack
    498     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
    499     if (HasFP) MinSize += SlotSize;
    500     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
    501     MFI->setStackSize(StackSize);
    502   }
    503 
    504   // Insert stack pointer adjustment for later moving of return addr.  Only
    505   // applies to tail call optimized functions where the callee argument stack
    506   // size is bigger than the callers.
    507   if (TailCallReturnAddrDelta < 0) {
    508     MachineInstr *MI =
    509       BuildMI(MBB, MBBI, DL,
    510               TII.get(getSUBriOpcode(IsLP64, -TailCallReturnAddrDelta)),
    511               StackPtr)
    512         .addReg(StackPtr)
    513         .addImm(-TailCallReturnAddrDelta)
    514         .setMIFlag(MachineInstr::FrameSetup);
    515     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
    516   }
    517 
    518   // Mapping for machine moves:
    519   //
    520   //   DST: VirtualFP AND
    521   //        SRC: VirtualFP              => DW_CFA_def_cfa_offset
    522   //        ELSE                        => DW_CFA_def_cfa
    523   //
    524   //   SRC: VirtualFP AND
    525   //        DST: Register               => DW_CFA_def_cfa_register
    526   //
    527   //   ELSE
    528   //        OFFSET < 0                  => DW_CFA_offset_extended_sf
    529   //        REG < 64                    => DW_CFA_offset + Reg
    530   //        ELSE                        => DW_CFA_offset_extended
    531 
    532   uint64_t NumBytes = 0;
    533   int stackGrowth = -SlotSize;
    534 
    535   if (HasFP) {
    536     // Calculate required stack adjustment.
    537     uint64_t FrameSize = StackSize - SlotSize;
    538     if (RegInfo->needsStackRealignment(MF)) {
    539       // Callee-saved registers are pushed on stack before the stack
    540       // is realigned.
    541       FrameSize -= X86FI->getCalleeSavedFrameSize();
    542       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
    543     } else {
    544       NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize();
    545     }
    546 
    547     // Get the offset of the stack slot for the EBP register, which is
    548     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
    549     // Update the frame offset adjustment.
    550     MFI->setOffsetAdjustment(-NumBytes);
    551 
    552     // Save EBP/RBP into the appropriate stack slot.
    553     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
    554       .addReg(FramePtr, RegState::Kill)
    555       .setMIFlag(MachineInstr::FrameSetup);
    556 
    557     if (NeedsDwarfCFI) {
    558       // Mark the place where EBP/RBP was saved.
    559       // Define the current CFA rule to use the provided offset.
    560       assert(StackSize);
    561       unsigned CFIIndex = MMI.addFrameInst(
    562           MCCFIInstruction::createDefCfaOffset(nullptr, 2 * stackGrowth));
    563       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    564           .addCFIIndex(CFIIndex);
    565 
    566       // Change the rule for the FramePtr to be an "offset" rule.
    567       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
    568       CFIIndex = MMI.addFrameInst(
    569           MCCFIInstruction::createOffset(nullptr,
    570                                          DwarfFramePtr, 2 * stackGrowth));
    571       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    572           .addCFIIndex(CFIIndex);
    573     }
    574 
    575     if (NeedsWinEH) {
    576       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
    577           .addImm(FramePtr)
    578           .setMIFlag(MachineInstr::FrameSetup);
    579     }
    580 
    581     // Update EBP with the new base value.
    582     BuildMI(MBB, MBBI, DL,
    583             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
    584         .addReg(StackPtr)
    585         .setMIFlag(MachineInstr::FrameSetup);
    586 
    587     if (NeedsDwarfCFI) {
    588       // Mark effective beginning of when frame pointer becomes valid.
    589       // Define the current CFA to use the EBP/RBP register.
    590       unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true);
    591       unsigned CFIIndex = MMI.addFrameInst(
    592           MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr));
    593       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    594           .addCFIIndex(CFIIndex);
    595     }
    596 
    597     // Mark the FramePtr as live-in in every block.
    598     for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I)
    599       I->addLiveIn(FramePtr);
    600   } else {
    601     NumBytes = StackSize - X86FI->getCalleeSavedFrameSize();
    602   }
    603 
    604   // Skip the callee-saved push instructions.
    605   bool PushedRegs = false;
    606   int StackOffset = 2 * stackGrowth;
    607 
    608   while (MBBI != MBB.end() &&
    609          (MBBI->getOpcode() == X86::PUSH32r ||
    610           MBBI->getOpcode() == X86::PUSH64r)) {
    611     PushedRegs = true;
    612     unsigned Reg = MBBI->getOperand(0).getReg();
    613     ++MBBI;
    614 
    615     if (!HasFP && NeedsDwarfCFI) {
    616       // Mark callee-saved push instruction.
    617       // Define the current CFA rule to use the provided offset.
    618       assert(StackSize);
    619       unsigned CFIIndex = MMI.addFrameInst(
    620           MCCFIInstruction::createDefCfaOffset(nullptr, StackOffset));
    621       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    622           .addCFIIndex(CFIIndex);
    623       StackOffset += stackGrowth;
    624     }
    625 
    626     if (NeedsWinEH) {
    627       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
    628           MachineInstr::FrameSetup);
    629     }
    630   }
    631 
    632   // Realign stack after we pushed callee-saved registers (so that we'll be
    633   // able to calculate their offsets from the frame pointer).
    634   if (RegInfo->needsStackRealignment(MF)) {
    635     assert(HasFP && "There should be a frame pointer if stack is realigned.");
    636     MachineInstr *MI =
    637       BuildMI(MBB, MBBI, DL,
    638               TII.get(Is64Bit ? X86::AND64ri32 : X86::AND32ri), StackPtr)
    639       .addReg(StackPtr)
    640       .addImm(-MaxAlign)
    641       .setMIFlag(MachineInstr::FrameSetup);
    642 
    643     // The EFLAGS implicit def is dead.
    644     MI->getOperand(3).setIsDead();
    645   }
    646 
    647   // If there is an SUB32ri of ESP immediately before this instruction, merge
    648   // the two. This can be the case when tail call elimination is enabled and
    649   // the callee has more arguments then the caller.
    650   NumBytes -= mergeSPUpdates(MBB, MBBI, StackPtr, true);
    651 
    652   // If there is an ADD32ri or SUB32ri of ESP immediately after this
    653   // instruction, merge the two instructions.
    654   mergeSPUpdatesDown(MBB, MBBI, StackPtr, &NumBytes);
    655 
    656   // Adjust stack pointer: ESP -= numbytes.
    657 
    658   // Windows and cygwin/mingw require a prologue helper routine when allocating
    659   // more than 4K bytes on the stack.  Windows uses __chkstk and cygwin/mingw
    660   // uses __alloca.  __alloca and the 32-bit version of __chkstk will probe the
    661   // stack and adjust the stack pointer in one go.  The 64-bit version of
    662   // __chkstk is only responsible for probing the stack.  The 64-bit prologue is
    663   // responsible for adjusting the stack pointer.  Touching the stack at 4K
    664   // increments is necessary to ensure that the guard pages used by the OS
    665   // virtual memory manager are allocated in correct sequence.
    666   if (NumBytes >= 4096 && STI.isOSWindows() && !STI.isTargetMacho()) {
    667     const char *StackProbeSymbol;
    668 
    669     if (Is64Bit) {
    670       if (STI.isTargetCygMing()) {
    671         StackProbeSymbol = "___chkstk_ms";
    672       } else {
    673         StackProbeSymbol = "__chkstk";
    674       }
    675     } else if (STI.isTargetCygMing())
    676       StackProbeSymbol = "_alloca";
    677     else
    678       StackProbeSymbol = "_chkstk";
    679 
    680     // Check whether EAX is livein for this function.
    681     bool isEAXAlive = isEAXLiveIn(MF);
    682 
    683     if (isEAXAlive) {
    684       // Sanity check that EAX is not livein for this function.
    685       // It should not be, so throw an assert.
    686       assert(!Is64Bit && "EAX is livein in x64 case!");
    687 
    688       // Save EAX
    689       BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r))
    690         .addReg(X86::EAX, RegState::Kill)
    691         .setMIFlag(MachineInstr::FrameSetup);
    692     }
    693 
    694     if (Is64Bit) {
    695       // Handle the 64-bit Windows ABI case where we need to call __chkstk.
    696       // Function prologue is responsible for adjusting the stack pointer.
    697       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX)
    698         .addImm(NumBytes)
    699         .setMIFlag(MachineInstr::FrameSetup);
    700     } else {
    701       // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
    702       // We'll also use 4 already allocated bytes for EAX.
    703       BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX)
    704         .addImm(isEAXAlive ? NumBytes - 4 : NumBytes)
    705         .setMIFlag(MachineInstr::FrameSetup);
    706     }
    707 
    708     BuildMI(MBB, MBBI, DL,
    709             TII.get(Is64Bit ? X86::W64ALLOCA : X86::CALLpcrel32))
    710       .addExternalSymbol(StackProbeSymbol)
    711       .addReg(StackPtr,    RegState::Define | RegState::Implicit)
    712       .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit)
    713       .setMIFlag(MachineInstr::FrameSetup);
    714 
    715     if (Is64Bit) {
    716       // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
    717       // themself. It also does not clobber %rax so we can reuse it when
    718       // adjusting %rsp.
    719       BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64rr), StackPtr)
    720         .addReg(StackPtr)
    721         .addReg(X86::RAX)
    722         .setMIFlag(MachineInstr::FrameSetup);
    723     }
    724     if (isEAXAlive) {
    725         // Restore EAX
    726         MachineInstr *MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm),
    727                                                 X86::EAX),
    728                                         StackPtr, false, NumBytes - 4);
    729         MI->setFlag(MachineInstr::FrameSetup);
    730         MBB.insert(MBBI, MI);
    731     }
    732   } else if (NumBytes) {
    733     emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64,
    734                  UseLEA, TII, *RegInfo);
    735   }
    736 
    737   int SEHFrameOffset = 0;
    738   if (NeedsWinEH) {
    739     if (HasFP) {
    740       // We need to set frame base offset low enough such that all saved
    741       // register offsets would be positive relative to it, but we can't
    742       // just use NumBytes, because .seh_setframe offset must be <=240.
    743       // So we pretend to have only allocated enough space to spill the
    744       // non-volatile registers.
    745       // We don't care about the rest of stack allocation, because unwinder
    746       // will restore SP to (BP - SEHFrameOffset)
    747       for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
    748         int offset = MFI->getObjectOffset(Info.getFrameIdx());
    749         SEHFrameOffset = std::max(SEHFrameOffset, abs(offset));
    750       }
    751       SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant
    752 
    753       // This only needs to account for XMM spill slots, GPR slots
    754       // are covered by the .seh_pushreg's emitted above.
    755       unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize();
    756       if (Size) {
    757         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
    758             .addImm(Size)
    759             .setMIFlag(MachineInstr::FrameSetup);
    760       }
    761 
    762       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
    763           .addImm(FramePtr)
    764           .addImm(SEHFrameOffset)
    765           .setMIFlag(MachineInstr::FrameSetup);
    766     } else {
    767       // SP will be the base register for restoring XMMs
    768       if (NumBytes) {
    769         BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
    770             .addImm(NumBytes)
    771             .setMIFlag(MachineInstr::FrameSetup);
    772       }
    773     }
    774   }
    775 
    776   // Skip the rest of register spilling code
    777   while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
    778     ++MBBI;
    779 
    780   // Emit SEH info for non-GPRs
    781   if (NeedsWinEH) {
    782     for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) {
    783       unsigned Reg = Info.getReg();
    784       if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
    785         continue;
    786       assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class");
    787 
    788       int Offset = getFrameIndexOffset(MF, Info.getFrameIdx());
    789       Offset += SEHFrameOffset;
    790 
    791       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
    792           .addImm(Reg)
    793           .addImm(Offset)
    794           .setMIFlag(MachineInstr::FrameSetup);
    795     }
    796 
    797     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
    798         .setMIFlag(MachineInstr::FrameSetup);
    799   }
    800 
    801   // If we need a base pointer, set it up here. It's whatever the value
    802   // of the stack pointer is at this point. Any variable size objects
    803   // will be allocated after this, so we can still use the base pointer
    804   // to reference locals.
    805   if (RegInfo->hasBasePointer(MF)) {
    806     // Update the base pointer with the current stack pointer.
    807     unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
    808     BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
    809       .addReg(StackPtr)
    810       .setMIFlag(MachineInstr::FrameSetup);
    811   }
    812 
    813   if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) {
    814     // Mark end of stack pointer adjustment.
    815     if (!HasFP && NumBytes) {
    816       // Define the current CFA rule to use the provided offset.
    817       assert(StackSize);
    818       unsigned CFIIndex = MMI.addFrameInst(
    819           MCCFIInstruction::createDefCfaOffset(nullptr,
    820                                                -StackSize + stackGrowth));
    821 
    822       BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
    823           .addCFIIndex(CFIIndex);
    824     }
    825 
    826     // Emit DWARF info specifying the offsets of the callee-saved registers.
    827     if (PushedRegs)
    828       emitCalleeSavedFrameMoves(MBB, MBBI, DL);
    829   }
    830 }
    831 
    832 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
    833                                     MachineBasicBlock &MBB) const {
    834   const MachineFrameInfo *MFI = MF.getFrameInfo();
    835   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
    836   const X86RegisterInfo *RegInfo =
    837       static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
    838   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    839   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    840   assert(MBBI != MBB.end() && "Returning block has no instructions");
    841   unsigned RetOpcode = MBBI->getOpcode();
    842   DebugLoc DL = MBBI->getDebugLoc();
    843   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
    844   bool Is64Bit = STI.is64Bit();
    845   bool IsLP64 = STI.isTarget64BitLP64();
    846   bool UseLEA = STI.useLeaForSP();
    847   unsigned StackAlign = getStackAlignment();
    848   unsigned SlotSize = RegInfo->getSlotSize();
    849   unsigned FramePtr = RegInfo->getFrameRegister(MF);
    850   unsigned StackPtr = RegInfo->getStackRegister();
    851 
    852   switch (RetOpcode) {
    853   default:
    854     llvm_unreachable("Can only insert epilog into returning blocks");
    855   case X86::RETQ:
    856   case X86::RETL:
    857   case X86::RETIL:
    858   case X86::RETIQ:
    859   case X86::TCRETURNdi:
    860   case X86::TCRETURNri:
    861   case X86::TCRETURNmi:
    862   case X86::TCRETURNdi64:
    863   case X86::TCRETURNri64:
    864   case X86::TCRETURNmi64:
    865   case X86::EH_RETURN:
    866   case X86::EH_RETURN64:
    867     break;  // These are ok
    868   }
    869 
    870   // Get the number of bytes to allocate from the FrameInfo.
    871   uint64_t StackSize = MFI->getStackSize();
    872   uint64_t MaxAlign  = MFI->getMaxAlignment();
    873   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
    874   uint64_t NumBytes = 0;
    875 
    876   // If we're forcing a stack realignment we can't rely on just the frame
    877   // info, we need to know the ABI stack alignment as well in case we
    878   // have a call out.  Otherwise just make sure we have some alignment - we'll
    879   // go with the minimum.
    880   if (ForceStackAlign) {
    881     if (MFI->hasCalls())
    882       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
    883     else
    884       MaxAlign = MaxAlign ? MaxAlign : 4;
    885   }
    886 
    887   if (hasFP(MF)) {
    888     // Calculate required stack adjustment.
    889     uint64_t FrameSize = StackSize - SlotSize;
    890     if (RegInfo->needsStackRealignment(MF)) {
    891       // Callee-saved registers were pushed on stack before the stack
    892       // was realigned.
    893       FrameSize -= CSSize;
    894       NumBytes = (FrameSize + MaxAlign - 1) / MaxAlign * MaxAlign;
    895     } else {
    896       NumBytes = FrameSize - CSSize;
    897     }
    898 
    899     // Pop EBP.
    900     BuildMI(MBB, MBBI, DL,
    901             TII.get(Is64Bit ? X86::POP64r : X86::POP32r), FramePtr);
    902   } else {
    903     NumBytes = StackSize - CSSize;
    904   }
    905 
    906   // Skip the callee-saved pop instructions.
    907   while (MBBI != MBB.begin()) {
    908     MachineBasicBlock::iterator PI = std::prev(MBBI);
    909     unsigned Opc = PI->getOpcode();
    910 
    911     if (Opc != X86::POP32r && Opc != X86::POP64r && Opc != X86::DBG_VALUE &&
    912         !PI->isTerminator())
    913       break;
    914 
    915     --MBBI;
    916   }
    917   MachineBasicBlock::iterator FirstCSPop = MBBI;
    918 
    919   DL = MBBI->getDebugLoc();
    920 
    921   // If there is an ADD32ri or SUB32ri of ESP immediately before this
    922   // instruction, merge the two instructions.
    923   if (NumBytes || MFI->hasVarSizedObjects())
    924     mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
    925 
    926   // If dynamic alloca is used, then reset esp to point to the last callee-saved
    927   // slot before popping them off! Same applies for the case, when stack was
    928   // realigned.
    929   if (RegInfo->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) {
    930     if (RegInfo->needsStackRealignment(MF))
    931       MBBI = FirstCSPop;
    932     if (CSSize != 0) {
    933       unsigned Opc = getLEArOpcode(IsLP64);
    934       addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr),
    935                    FramePtr, false, -CSSize);
    936     } else {
    937       unsigned Opc = (Is64Bit ? X86::MOV64rr : X86::MOV32rr);
    938       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
    939         .addReg(FramePtr);
    940     }
    941   } else if (NumBytes) {
    942     // Adjust stack pointer back: ESP += numbytes.
    943     emitSPUpdate(MBB, MBBI, StackPtr, NumBytes, Is64Bit, IsLP64, UseLEA,
    944                  TII, *RegInfo);
    945   }
    946 
    947   // We're returning from function via eh_return.
    948   if (RetOpcode == X86::EH_RETURN || RetOpcode == X86::EH_RETURN64) {
    949     MBBI = MBB.getLastNonDebugInstr();
    950     MachineOperand &DestAddr  = MBBI->getOperand(0);
    951     assert(DestAddr.isReg() && "Offset should be in register!");
    952     BuildMI(MBB, MBBI, DL,
    953             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
    954             StackPtr).addReg(DestAddr.getReg());
    955   } else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
    956              RetOpcode == X86::TCRETURNmi ||
    957              RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
    958              RetOpcode == X86::TCRETURNmi64) {
    959     bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
    960     // Tail call return: adjust the stack pointer and jump to callee.
    961     MBBI = MBB.getLastNonDebugInstr();
    962     MachineOperand &JumpTarget = MBBI->getOperand(0);
    963     MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
    964     assert(StackAdjust.isImm() && "Expecting immediate value.");
    965 
    966     // Adjust stack pointer.
    967     int StackAdj = StackAdjust.getImm();
    968     int MaxTCDelta = X86FI->getTCReturnAddrDelta();
    969     int Offset = 0;
    970     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
    971 
    972     // Incoporate the retaddr area.
    973     Offset = StackAdj-MaxTCDelta;
    974     assert(Offset >= 0 && "Offset should never be negative");
    975 
    976     if (Offset) {
    977       // Check for possible merge with preceding ADD instruction.
    978       Offset += mergeSPUpdates(MBB, MBBI, StackPtr, true);
    979       emitSPUpdate(MBB, MBBI, StackPtr, Offset, Is64Bit, IsLP64,
    980                    UseLEA, TII, *RegInfo);
    981     }
    982 
    983     // Jump to label or value in register.
    984     if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
    985       MachineInstrBuilder MIB =
    986         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
    987                                        ? X86::TAILJMPd : X86::TAILJMPd64));
    988       if (JumpTarget.isGlobal())
    989         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
    990                              JumpTarget.getTargetFlags());
    991       else {
    992         assert(JumpTarget.isSymbol());
    993         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
    994                               JumpTarget.getTargetFlags());
    995       }
    996     } else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
    997       MachineInstrBuilder MIB =
    998         BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
    999                                        ? X86::TAILJMPm : X86::TAILJMPm64));
   1000       for (unsigned i = 0; i != 5; ++i)
   1001         MIB.addOperand(MBBI->getOperand(i));
   1002     } else if (RetOpcode == X86::TCRETURNri64) {
   1003       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64)).
   1004         addReg(JumpTarget.getReg(), RegState::Kill);
   1005     } else {
   1006       BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr)).
   1007         addReg(JumpTarget.getReg(), RegState::Kill);
   1008     }
   1009 
   1010     MachineInstr *NewMI = std::prev(MBBI);
   1011     NewMI->copyImplicitOps(MF, MBBI);
   1012 
   1013     // Delete the pseudo instruction TCRETURN.
   1014     MBB.erase(MBBI);
   1015   } else if ((RetOpcode == X86::RETQ || RetOpcode == X86::RETL ||
   1016               RetOpcode == X86::RETIQ || RetOpcode == X86::RETIL) &&
   1017              (X86FI->getTCReturnAddrDelta() < 0)) {
   1018     // Add the return addr area delta back since we are not tail calling.
   1019     int delta = -1*X86FI->getTCReturnAddrDelta();
   1020     MBBI = MBB.getLastNonDebugInstr();
   1021 
   1022     // Check for possible merge with preceding ADD instruction.
   1023     delta += mergeSPUpdates(MBB, MBBI, StackPtr, true);
   1024     emitSPUpdate(MBB, MBBI, StackPtr, delta, Is64Bit, IsLP64, UseLEA, TII,
   1025                  *RegInfo);
   1026   }
   1027 }
   1028 
   1029 int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
   1030                                           int FI) const {
   1031   const X86RegisterInfo *RegInfo =
   1032     static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
   1033   const MachineFrameInfo *MFI = MF.getFrameInfo();
   1034   int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
   1035   uint64_t StackSize = MFI->getStackSize();
   1036 
   1037   if (RegInfo->hasBasePointer(MF)) {
   1038     assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
   1039     if (FI < 0) {
   1040       // Skip the saved EBP.
   1041       return Offset + RegInfo->getSlotSize();
   1042     } else {
   1043       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
   1044       return Offset + StackSize;
   1045     }
   1046   } else if (RegInfo->needsStackRealignment(MF)) {
   1047     if (FI < 0) {
   1048       // Skip the saved EBP.
   1049       return Offset + RegInfo->getSlotSize();
   1050     } else {
   1051       assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
   1052       return Offset + StackSize;
   1053     }
   1054     // FIXME: Support tail calls
   1055   } else {
   1056     if (!hasFP(MF))
   1057       return Offset + StackSize;
   1058 
   1059     // Skip the saved EBP.
   1060     Offset += RegInfo->getSlotSize();
   1061 
   1062     // Skip the RETADDR move area
   1063     const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1064     int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   1065     if (TailCallReturnAddrDelta < 0)
   1066       Offset -= TailCallReturnAddrDelta;
   1067   }
   1068 
   1069   return Offset;
   1070 }
   1071 
   1072 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   1073                                              unsigned &FrameReg) const {
   1074   const X86RegisterInfo *RegInfo =
   1075       static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
   1076   // We can't calculate offset from frame pointer if the stack is realigned,
   1077   // so enforce usage of stack/base pointer.  The base pointer is used when we
   1078   // have dynamic allocas in addition to dynamic realignment.
   1079   if (RegInfo->hasBasePointer(MF))
   1080     FrameReg = RegInfo->getBaseRegister();
   1081   else if (RegInfo->needsStackRealignment(MF))
   1082     FrameReg = RegInfo->getStackRegister();
   1083   else
   1084     FrameReg = RegInfo->getFrameRegister(MF);
   1085   return getFrameIndexOffset(MF, FI);
   1086 }
   1087 
   1088 bool X86FrameLowering::assignCalleeSavedSpillSlots(
   1089     MachineFunction &MF, const TargetRegisterInfo *TRI,
   1090     std::vector<CalleeSavedInfo> &CSI) const {
   1091   MachineFrameInfo *MFI = MF.getFrameInfo();
   1092   const X86RegisterInfo *RegInfo =
   1093       static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
   1094   unsigned SlotSize = RegInfo->getSlotSize();
   1095   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1096 
   1097   unsigned CalleeSavedFrameSize = 0;
   1098   int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta();
   1099 
   1100   if (hasFP(MF)) {
   1101     // emitPrologue always spills frame register the first thing.
   1102     SpillSlotOffset -= SlotSize;
   1103     MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
   1104 
   1105     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
   1106     // the frame register, we can delete it from CSI list and not have to worry
   1107     // about avoiding it later.
   1108     unsigned FPReg = RegInfo->getFrameRegister(MF);
   1109     for (unsigned i = 0; i < CSI.size(); ++i) {
   1110       if (CSI[i].getReg() == FPReg) {
   1111         CSI.erase(CSI.begin() + i);
   1112         break;
   1113       }
   1114     }
   1115   }
   1116 
   1117   // Assign slots for GPRs. It increases frame size.
   1118   for (unsigned i = CSI.size(); i != 0; --i) {
   1119     unsigned Reg = CSI[i - 1].getReg();
   1120 
   1121     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
   1122       continue;
   1123 
   1124     SpillSlotOffset -= SlotSize;
   1125     CalleeSavedFrameSize += SlotSize;
   1126 
   1127     int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
   1128     CSI[i - 1].setFrameIdx(SlotIndex);
   1129   }
   1130 
   1131   X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize);
   1132 
   1133   // Assign slots for XMMs.
   1134   for (unsigned i = CSI.size(); i != 0; --i) {
   1135     unsigned Reg = CSI[i - 1].getReg();
   1136     if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg))
   1137       continue;
   1138 
   1139     const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg);
   1140     // ensure alignment
   1141     SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment();
   1142     // spill into slot
   1143     SpillSlotOffset -= RC->getSize();
   1144     int SlotIndex =
   1145         MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
   1146     CSI[i - 1].setFrameIdx(SlotIndex);
   1147     MFI->ensureMaxAlignment(RC->getAlignment());
   1148   }
   1149 
   1150   return true;
   1151 }
   1152 
   1153 bool X86FrameLowering::spillCalleeSavedRegisters(
   1154     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
   1155     const std::vector<CalleeSavedInfo> &CSI,
   1156     const TargetRegisterInfo *TRI) const {
   1157   DebugLoc DL = MBB.findDebugLoc(MI);
   1158 
   1159   MachineFunction &MF = *MBB.getParent();
   1160   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   1161   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   1162 
   1163   // Push GPRs. It increases frame size.
   1164   unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r;
   1165   for (unsigned i = CSI.size(); i != 0; --i) {
   1166     unsigned Reg = CSI[i - 1].getReg();
   1167 
   1168     if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg))
   1169       continue;
   1170     // Add the callee-saved register as live-in. It's killed at the spill.
   1171     MBB.addLiveIn(Reg);
   1172 
   1173     BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
   1174       .setMIFlag(MachineInstr::FrameSetup);
   1175   }
   1176 
   1177   // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
   1178   // It can be done by spilling XMMs to stack frame.
   1179   for (unsigned i = CSI.size(); i != 0; --i) {
   1180     unsigned Reg = CSI[i-1].getReg();
   1181     if (X86::GR64RegClass.contains(Reg) ||
   1182         X86::GR32RegClass.contains(Reg))
   1183       continue;
   1184     // Add the callee-saved register as live-in. It's killed at the spill.
   1185     MBB.addLiveIn(Reg);
   1186     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
   1187 
   1188     TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC,
   1189                             TRI);
   1190     --MI;
   1191     MI->setFlag(MachineInstr::FrameSetup);
   1192     ++MI;
   1193   }
   1194 
   1195   return true;
   1196 }
   1197 
   1198 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   1199                                                MachineBasicBlock::iterator MI,
   1200                                         const std::vector<CalleeSavedInfo> &CSI,
   1201                                           const TargetRegisterInfo *TRI) const {
   1202   if (CSI.empty())
   1203     return false;
   1204 
   1205   DebugLoc DL = MBB.findDebugLoc(MI);
   1206 
   1207   MachineFunction &MF = *MBB.getParent();
   1208   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   1209   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   1210 
   1211   // Reload XMMs from stack frame.
   1212   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
   1213     unsigned Reg = CSI[i].getReg();
   1214     if (X86::GR64RegClass.contains(Reg) ||
   1215         X86::GR32RegClass.contains(Reg))
   1216       continue;
   1217 
   1218     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
   1219     TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI);
   1220   }
   1221 
   1222   // POP GPRs.
   1223   unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r;
   1224   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
   1225     unsigned Reg = CSI[i].getReg();
   1226     if (!X86::GR64RegClass.contains(Reg) &&
   1227         !X86::GR32RegClass.contains(Reg))
   1228       continue;
   1229 
   1230     BuildMI(MBB, MI, DL, TII.get(Opc), Reg);
   1231   }
   1232   return true;
   1233 }
   1234 
   1235 void
   1236 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   1237                                                        RegScavenger *RS) const {
   1238   MachineFrameInfo *MFI = MF.getFrameInfo();
   1239   const X86RegisterInfo *RegInfo =
   1240       static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
   1241   unsigned SlotSize = RegInfo->getSlotSize();
   1242 
   1243   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1244   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
   1245 
   1246   if (TailCallReturnAddrDelta < 0) {
   1247     // create RETURNADDR area
   1248     //   arg
   1249     //   arg
   1250     //   RETADDR
   1251     //   { ...
   1252     //     RETADDR area
   1253     //     ...
   1254     //   }
   1255     //   [EBP]
   1256     MFI->CreateFixedObject(-TailCallReturnAddrDelta,
   1257                            TailCallReturnAddrDelta - SlotSize, true);
   1258   }
   1259 
   1260   // Spill the BasePtr if it's used.
   1261   if (RegInfo->hasBasePointer(MF))
   1262     MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
   1263 }
   1264 
   1265 static bool
   1266 HasNestArgument(const MachineFunction *MF) {
   1267   const Function *F = MF->getFunction();
   1268   for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
   1269        I != E; I++) {
   1270     if (I->hasNestAttr())
   1271       return true;
   1272   }
   1273   return false;
   1274 }
   1275 
   1276 /// GetScratchRegister - Get a temp register for performing work in the
   1277 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
   1278 /// and the properties of the function either one or two registers will be
   1279 /// needed. Set primary to true for the first register, false for the second.
   1280 static unsigned
   1281 GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
   1282   CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
   1283 
   1284   // Erlang stuff.
   1285   if (CallingConvention == CallingConv::HiPE) {
   1286     if (Is64Bit)
   1287       return Primary ? X86::R14 : X86::R13;
   1288     else
   1289       return Primary ? X86::EBX : X86::EDI;
   1290   }
   1291 
   1292   if (Is64Bit)
   1293     return Primary ? X86::R11 : X86::R12;
   1294 
   1295   bool IsNested = HasNestArgument(&MF);
   1296 
   1297   if (CallingConvention == CallingConv::X86_FastCall ||
   1298       CallingConvention == CallingConv::Fast) {
   1299     if (IsNested)
   1300       report_fatal_error("Segmented stacks does not support fastcall with "
   1301                          "nested function.");
   1302     return Primary ? X86::EAX : X86::ECX;
   1303   }
   1304   if (IsNested)
   1305     return Primary ? X86::EDX : X86::EAX;
   1306   return Primary ? X86::ECX : X86::EAX;
   1307 }
   1308 
   1309 // The stack limit in the TCB is set to this many bytes above the actual stack
   1310 // limit.
   1311 static const uint64_t kSplitStackAvailable = 256;
   1312 
   1313 void
   1314 X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
   1315   MachineBasicBlock &prologueMBB = MF.front();
   1316   MachineFrameInfo *MFI = MF.getFrameInfo();
   1317   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   1318   uint64_t StackSize;
   1319   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   1320   bool Is64Bit = STI.is64Bit();
   1321   unsigned TlsReg, TlsOffset;
   1322   DebugLoc DL;
   1323 
   1324   unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
   1325   assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
   1326          "Scratch register is live-in");
   1327 
   1328   if (MF.getFunction()->isVarArg())
   1329     report_fatal_error("Segmented stacks do not support vararg functions.");
   1330   if (!STI.isTargetLinux() && !STI.isTargetDarwin() &&
   1331       !STI.isTargetWin32() && !STI.isTargetWin64() && !STI.isTargetFreeBSD())
   1332     report_fatal_error("Segmented stacks not supported on this platform.");
   1333 
   1334   // Eventually StackSize will be calculated by a link-time pass; which will
   1335   // also decide whether checking code needs to be injected into this particular
   1336   // prologue.
   1337   StackSize = MFI->getStackSize();
   1338 
   1339   // Do not generate a prologue for functions with a stack of size zero
   1340   if (StackSize == 0)
   1341     return;
   1342 
   1343   MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock();
   1344   MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock();
   1345   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   1346   bool IsNested = false;
   1347 
   1348   // We need to know if the function has a nest argument only in 64 bit mode.
   1349   if (Is64Bit)
   1350     IsNested = HasNestArgument(&MF);
   1351 
   1352   // The MOV R10, RAX needs to be in a different block, since the RET we emit in
   1353   // allocMBB needs to be last (terminating) instruction.
   1354 
   1355   for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
   1356          e = prologueMBB.livein_end(); i != e; i++) {
   1357     allocMBB->addLiveIn(*i);
   1358     checkMBB->addLiveIn(*i);
   1359   }
   1360 
   1361   if (IsNested)
   1362     allocMBB->addLiveIn(X86::R10);
   1363 
   1364   MF.push_front(allocMBB);
   1365   MF.push_front(checkMBB);
   1366 
   1367   // When the frame size is less than 256 we just compare the stack
   1368   // boundary directly to the value of the stack pointer, per gcc.
   1369   bool CompareStackPointer = StackSize < kSplitStackAvailable;
   1370 
   1371   // Read the limit off the current stacklet off the stack_guard location.
   1372   if (Is64Bit) {
   1373     if (STI.isTargetLinux()) {
   1374       TlsReg = X86::FS;
   1375       TlsOffset = 0x70;
   1376     } else if (STI.isTargetDarwin()) {
   1377       TlsReg = X86::GS;
   1378       TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
   1379     } else if (STI.isTargetWin64()) {
   1380       TlsReg = X86::GS;
   1381       TlsOffset = 0x28; // pvArbitrary, reserved for application use
   1382     } else if (STI.isTargetFreeBSD()) {
   1383       TlsReg = X86::FS;
   1384       TlsOffset = 0x18;
   1385     } else {
   1386       report_fatal_error("Segmented stacks not supported on this platform.");
   1387     }
   1388 
   1389     if (CompareStackPointer)
   1390       ScratchReg = X86::RSP;
   1391     else
   1392       BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
   1393         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
   1394 
   1395     BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
   1396       .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   1397   } else {
   1398     if (STI.isTargetLinux()) {
   1399       TlsReg = X86::GS;
   1400       TlsOffset = 0x30;
   1401     } else if (STI.isTargetDarwin()) {
   1402       TlsReg = X86::GS;
   1403       TlsOffset = 0x48 + 90*4;
   1404     } else if (STI.isTargetWin32()) {
   1405       TlsReg = X86::FS;
   1406       TlsOffset = 0x14; // pvArbitrary, reserved for application use
   1407     } else if (STI.isTargetFreeBSD()) {
   1408       report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
   1409     } else {
   1410       report_fatal_error("Segmented stacks not supported on this platform.");
   1411     }
   1412 
   1413     if (CompareStackPointer)
   1414       ScratchReg = X86::ESP;
   1415     else
   1416       BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP)
   1417         .addImm(1).addReg(0).addImm(-StackSize).addReg(0);
   1418 
   1419     if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64()) {
   1420       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg)
   1421         .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg);
   1422     } else if (STI.isTargetDarwin()) {
   1423 
   1424       // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
   1425       unsigned ScratchReg2;
   1426       bool SaveScratch2;
   1427       if (CompareStackPointer) {
   1428         // The primary scratch register is available for holding the TLS offset.
   1429         ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
   1430         SaveScratch2 = false;
   1431       } else {
   1432         // Need to use a second register to hold the TLS offset
   1433         ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
   1434 
   1435         // Unfortunately, with fastcc the second scratch register may hold an
   1436         // argument.
   1437         SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2);
   1438       }
   1439 
   1440       // If Scratch2 is live-in then it needs to be saved.
   1441       assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&
   1442              "Scratch register is live-in and not saved");
   1443 
   1444       if (SaveScratch2)
   1445         BuildMI(checkMBB, DL, TII.get(X86::PUSH32r))
   1446           .addReg(ScratchReg2, RegState::Kill);
   1447 
   1448       BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2)
   1449         .addImm(TlsOffset);
   1450       BuildMI(checkMBB, DL, TII.get(X86::CMP32rm))
   1451         .addReg(ScratchReg)
   1452         .addReg(ScratchReg2).addImm(1).addReg(0)
   1453         .addImm(0)
   1454         .addReg(TlsReg);
   1455 
   1456       if (SaveScratch2)
   1457         BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2);
   1458     }
   1459   }
   1460 
   1461   // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
   1462   // It jumps to normal execution of the function body.
   1463   BuildMI(checkMBB, DL, TII.get(X86::JA_4)).addMBB(&prologueMBB);
   1464 
   1465   // On 32 bit we first push the arguments size and then the frame size. On 64
   1466   // bit, we pass the stack frame size in r10 and the argument size in r11.
   1467   if (Is64Bit) {
   1468     // Functions with nested arguments use R10, so it needs to be saved across
   1469     // the call to _morestack
   1470 
   1471     if (IsNested)
   1472       BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
   1473 
   1474     BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
   1475       .addImm(StackSize);
   1476     BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
   1477       .addImm(X86FI->getArgumentStackSize());
   1478     MF.getRegInfo().setPhysRegUsed(X86::R10);
   1479     MF.getRegInfo().setPhysRegUsed(X86::R11);
   1480   } else {
   1481     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
   1482       .addImm(X86FI->getArgumentStackSize());
   1483     BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
   1484       .addImm(StackSize);
   1485   }
   1486 
   1487   // __morestack is in libgcc
   1488   if (Is64Bit)
   1489     BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32))
   1490       .addExternalSymbol("__morestack");
   1491   else
   1492     BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32))
   1493       .addExternalSymbol("__morestack");
   1494 
   1495   if (IsNested)
   1496     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10));
   1497   else
   1498     BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET));
   1499 
   1500   allocMBB->addSuccessor(&prologueMBB);
   1501 
   1502   checkMBB->addSuccessor(allocMBB);
   1503   checkMBB->addSuccessor(&prologueMBB);
   1504 
   1505 #ifdef XDEBUG
   1506   MF.verify();
   1507 #endif
   1508 }
   1509 
   1510 /// Erlang programs may need a special prologue to handle the stack size they
   1511 /// might need at runtime. That is because Erlang/OTP does not implement a C
   1512 /// stack but uses a custom implementation of hybrid stack/heap architecture.
   1513 /// (for more information see Eric Stenman's Ph.D. thesis:
   1514 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
   1515 ///
   1516 /// CheckStack:
   1517 ///       temp0 = sp - MaxStack
   1518 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
   1519 /// OldStart:
   1520 ///       ...
   1521 /// IncStack:
   1522 ///       call inc_stack   # doubles the stack space
   1523 ///       temp0 = sp - MaxStack
   1524 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
   1525 void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
   1526   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   1527   MachineFrameInfo *MFI = MF.getFrameInfo();
   1528   const unsigned SlotSize =
   1529       static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo())
   1530           ->getSlotSize();
   1531   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   1532   const bool Is64Bit = STI.is64Bit();
   1533   DebugLoc DL;
   1534   // HiPE-specific values
   1535   const unsigned HipeLeafWords = 24;
   1536   const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
   1537   const unsigned Guaranteed = HipeLeafWords * SlotSize;
   1538   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
   1539                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
   1540   unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
   1541 
   1542   assert(STI.isTargetLinux() &&
   1543          "HiPE prologue is only supported on Linux operating systems.");
   1544 
   1545   // Compute the largest caller's frame that is needed to fit the callees'
   1546   // frames. This 'MaxStack' is computed from:
   1547   //
   1548   // a) the fixed frame size, which is the space needed for all spilled temps,
   1549   // b) outgoing on-stack parameter areas, and
   1550   // c) the minimum stack space this function needs to make available for the
   1551   //    functions it calls (a tunable ABI property).
   1552   if (MFI->hasCalls()) {
   1553     unsigned MoreStackForCalls = 0;
   1554 
   1555     for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
   1556          MBBI != MBBE; ++MBBI)
   1557       for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
   1558            MI != ME; ++MI) {
   1559         if (!MI->isCall())
   1560           continue;
   1561 
   1562         // Get callee operand.
   1563         const MachineOperand &MO = MI->getOperand(0);
   1564 
   1565         // Only take account of global function calls (no closures etc.).
   1566         if (!MO.isGlobal())
   1567           continue;
   1568 
   1569         const Function *F = dyn_cast<Function>(MO.getGlobal());
   1570         if (!F)
   1571           continue;
   1572 
   1573         // Do not update 'MaxStack' for primitive and built-in functions
   1574         // (encoded with names either starting with "erlang."/"bif_" or not
   1575         // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
   1576         // "_", such as the BIF "suspend_0") as they are executed on another
   1577         // stack.
   1578         if (F->getName().find("erlang.") != StringRef::npos ||
   1579             F->getName().find("bif_") != StringRef::npos ||
   1580             F->getName().find_first_of("._") == StringRef::npos)
   1581           continue;
   1582 
   1583         unsigned CalleeStkArity =
   1584           F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0;
   1585         if (HipeLeafWords - 1 > CalleeStkArity)
   1586           MoreStackForCalls = std::max(MoreStackForCalls,
   1587                                (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
   1588       }
   1589     MaxStack += MoreStackForCalls;
   1590   }
   1591 
   1592   // If the stack frame needed is larger than the guaranteed then runtime checks
   1593   // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
   1594   if (MaxStack > Guaranteed) {
   1595     MachineBasicBlock &prologueMBB = MF.front();
   1596     MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
   1597     MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
   1598 
   1599     for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
   1600            E = prologueMBB.livein_end(); I != E; I++) {
   1601       stackCheckMBB->addLiveIn(*I);
   1602       incStackMBB->addLiveIn(*I);
   1603     }
   1604 
   1605     MF.push_front(incStackMBB);
   1606     MF.push_front(stackCheckMBB);
   1607 
   1608     unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
   1609     unsigned LEAop, CMPop, CALLop;
   1610     if (Is64Bit) {
   1611       SPReg = X86::RSP;
   1612       PReg  = X86::RBP;
   1613       LEAop = X86::LEA64r;
   1614       CMPop = X86::CMP64rm;
   1615       CALLop = X86::CALL64pcrel32;
   1616       SPLimitOffset = 0x90;
   1617     } else {
   1618       SPReg = X86::ESP;
   1619       PReg  = X86::EBP;
   1620       LEAop = X86::LEA32r;
   1621       CMPop = X86::CMP32rm;
   1622       CALLop = X86::CALLpcrel32;
   1623       SPLimitOffset = 0x4c;
   1624     }
   1625 
   1626     ScratchReg = GetScratchRegister(Is64Bit, MF, true);
   1627     assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
   1628            "HiPE prologue scratch register is live-in");
   1629 
   1630     // Create new MBB for StackCheck:
   1631     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
   1632                  SPReg, false, -MaxStack);
   1633     // SPLimitOffset is in a fixed heap location (pointed by BP).
   1634     addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
   1635                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
   1636     BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
   1637 
   1638     // Create new MBB for IncStack:
   1639     BuildMI(incStackMBB, DL, TII.get(CALLop)).
   1640       addExternalSymbol("inc_stack_0");
   1641     addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
   1642                  SPReg, false, -MaxStack);
   1643     addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
   1644                  .addReg(ScratchReg), PReg, false, SPLimitOffset);
   1645     BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
   1646 
   1647     stackCheckMBB->addSuccessor(&prologueMBB, 99);
   1648     stackCheckMBB->addSuccessor(incStackMBB, 1);
   1649     incStackMBB->addSuccessor(&prologueMBB, 99);
   1650     incStackMBB->addSuccessor(incStackMBB, 1);
   1651   }
   1652 #ifdef XDEBUG
   1653   MF.verify();
   1654 #endif
   1655 }
   1656 
   1657 void X86FrameLowering::
   1658 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   1659                               MachineBasicBlock::iterator I) const {
   1660   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
   1661   const X86RegisterInfo &RegInfo =
   1662       *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo());
   1663   unsigned StackPtr = RegInfo.getStackRegister();
   1664   bool reseveCallFrame = hasReservedCallFrame(MF);
   1665   int Opcode = I->getOpcode();
   1666   bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
   1667   const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
   1668   bool IsLP64 = STI.isTarget64BitLP64();
   1669   DebugLoc DL = I->getDebugLoc();
   1670   uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
   1671   uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
   1672   I = MBB.erase(I);
   1673 
   1674   if (!reseveCallFrame) {
   1675     // If the stack pointer can be changed after prologue, turn the
   1676     // adjcallstackup instruction into a 'sub ESP, <amt>' and the
   1677     // adjcallstackdown instruction into 'add ESP, <amt>'
   1678     // TODO: consider using push / pop instead of sub + store / add
   1679     if (Amount == 0)
   1680       return;
   1681 
   1682     // We need to keep the stack aligned properly.  To do this, we round the
   1683     // amount of space needed for the outgoing arguments up to the next
   1684     // alignment boundary.
   1685     unsigned StackAlign =
   1686         MF.getTarget().getFrameLowering()->getStackAlignment();
   1687     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
   1688 
   1689     MachineInstr *New = nullptr;
   1690     if (Opcode == TII.getCallFrameSetupOpcode()) {
   1691       New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
   1692                     StackPtr)
   1693         .addReg(StackPtr)
   1694         .addImm(Amount);
   1695     } else {
   1696       assert(Opcode == TII.getCallFrameDestroyOpcode());
   1697 
   1698       // Factor out the amount the callee already popped.
   1699       Amount -= CalleeAmt;
   1700 
   1701       if (Amount) {
   1702         unsigned Opc = getADDriOpcode(IsLP64, Amount);
   1703         New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
   1704           .addReg(StackPtr).addImm(Amount);
   1705       }
   1706     }
   1707 
   1708     if (New) {
   1709       // The EFLAGS implicit def is dead.
   1710       New->getOperand(3).setIsDead();
   1711 
   1712       // Replace the pseudo instruction with a new instruction.
   1713       MBB.insert(I, New);
   1714     }
   1715 
   1716     return;
   1717   }
   1718 
   1719   if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
   1720     // If we are performing frame pointer elimination and if the callee pops
   1721     // something off the stack pointer, add it back.  We do this until we have
   1722     // more advanced stack pointer tracking ability.
   1723     unsigned Opc = getSUBriOpcode(IsLP64, CalleeAmt);
   1724     MachineInstr *New = BuildMI(MF, DL, TII.get(Opc), StackPtr)
   1725       .addReg(StackPtr).addImm(CalleeAmt);
   1726 
   1727     // The EFLAGS implicit def is dead.
   1728     New->getOperand(3).setIsDead();
   1729 
   1730     // We are not tracking the stack pointer adjustment by the callee, so make
   1731     // sure we restore the stack pointer immediately after the call, there may
   1732     // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
   1733     MachineBasicBlock::iterator B = MBB.begin();
   1734     while (I != B && !std::prev(I)->isCall())
   1735       --I;
   1736     MBB.insert(I, New);
   1737   }
   1738 }
   1739 
   1740