Home | History | Annotate | Download | only in AArch64
      1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the AArch64 implementation of TargetFrameLowering class.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "AArch64FrameLowering.h"
     15 #include "AArch64InstrInfo.h"
     16 #include "AArch64MachineFunctionInfo.h"
     17 #include "AArch64Subtarget.h"
     18 #include "AArch64TargetMachine.h"
     19 #include "llvm/ADT/Statistic.h"
     20 #include "llvm/IR/DataLayout.h"
     21 #include "llvm/IR/Function.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "llvm/CodeGen/MachineInstrBuilder.h"
     25 #include "llvm/CodeGen/MachineModuleInfo.h"
     26 #include "llvm/CodeGen/MachineRegisterInfo.h"
     27 #include "llvm/CodeGen/RegisterScavenging.h"
     28 #include "llvm/Support/Debug.h"
     29 #include "llvm/Support/CommandLine.h"
     30 #include "llvm/Support/raw_ostream.h"
     31 
     32 using namespace llvm;
     33 
     34 #define DEBUG_TYPE "frame-info"
     35 
     36 static cl::opt<bool> EnableRedZone("aarch64-redzone",
     37                                    cl::desc("enable use of redzone on AArch64"),
     38                                    cl::init(false), cl::Hidden);
     39 
     40 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
     41 
     42 static unsigned estimateStackSize(MachineFunction &MF) {
     43   const MachineFrameInfo *FFI = MF.getFrameInfo();
     44   int Offset = 0;
     45   for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
     46     int FixedOff = -FFI->getObjectOffset(i);
     47     if (FixedOff > Offset)
     48       Offset = FixedOff;
     49   }
     50   for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
     51     if (FFI->isDeadObjectIndex(i))
     52       continue;
     53     Offset += FFI->getObjectSize(i);
     54     unsigned Align = FFI->getObjectAlignment(i);
     55     // Adjust to alignment boundary
     56     Offset = (Offset + Align - 1) / Align * Align;
     57   }
     58   // This does not include the 16 bytes used for fp and lr.
     59   return (unsigned)Offset;
     60 }
     61 
     62 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
     63   if (!EnableRedZone)
     64     return false;
     65   // Don't use the red zone if the function explicitly asks us not to.
     66   // This is typically used for kernel code.
     67   if (MF.getFunction()->getAttributes().hasAttribute(
     68           AttributeSet::FunctionIndex, Attribute::NoRedZone))
     69     return false;
     70 
     71   const MachineFrameInfo *MFI = MF.getFrameInfo();
     72   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
     73   unsigned NumBytes = AFI->getLocalStackSize();
     74 
     75   // Note: currently hasFP() is always true for hasCalls(), but that's an
     76   // implementation detail of the current code, not a strict requirement,
     77   // so stay safe here and check both.
     78   if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
     79     return false;
     80   return true;
     81 }
     82 
     83 /// hasFP - Return true if the specified function should have a dedicated frame
     84 /// pointer register.
     85 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
     86   const MachineFrameInfo *MFI = MF.getFrameInfo();
     87 
     88 #ifndef NDEBUG
     89   const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
     90   assert(!RegInfo->needsStackRealignment(MF) &&
     91          "No stack realignment on AArch64!");
     92 #endif
     93 
     94   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
     95           MFI->isFrameAddressTaken());
     96 }
     97 
     98 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
     99 /// not required, we reserve argument space for call sites in the function
    100 /// immediately on entry to the current function.  This eliminates the need for
    101 /// add/sub sp brackets around call sites.  Returns true if the call frame is
    102 /// included as part of the stack frame.
    103 bool
    104 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    105   return !MF.getFrameInfo()->hasVarSizedObjects();
    106 }
    107 
    108 void AArch64FrameLowering::eliminateCallFramePseudoInstr(
    109     MachineFunction &MF, MachineBasicBlock &MBB,
    110     MachineBasicBlock::iterator I) const {
    111   const AArch64InstrInfo *TII =
    112       static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
    113   DebugLoc DL = I->getDebugLoc();
    114   int Opc = I->getOpcode();
    115   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
    116   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
    117 
    118   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
    119   if (!TFI->hasReservedCallFrame(MF)) {
    120     unsigned Align = getStackAlignment();
    121 
    122     int64_t Amount = I->getOperand(0).getImm();
    123     Amount = RoundUpToAlignment(Amount, Align);
    124     if (!IsDestroy)
    125       Amount = -Amount;
    126 
    127     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
    128     // doesn't have to pop anything), then the first operand will be zero too so
    129     // this adjustment is a no-op.
    130     if (CalleePopAmount == 0) {
    131       // FIXME: in-function stack adjustment for calls is limited to 24-bits
    132       // because there's no guaranteed temporary register available.
    133       //
    134       // ADD/SUB (immediate) has only LSL #0 and LSL #12 avaiable.
    135       // 1) For offset <= 12-bit, we use LSL #0
    136       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
    137       // LSL #0, and the other uses LSL #12.
    138       //
    139       // Mostly call frames will be allocated at the start of a function so
    140       // this is OK, but it is a limitation that needs dealing with.
    141       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
    142       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
    143     }
    144   } else if (CalleePopAmount != 0) {
    145     // If the calling convention demands that the callee pops arguments from the
    146     // stack, we want to add it back if we have a reserved call frame.
    147     assert(CalleePopAmount < 0xffffff && "call frame too large");
    148     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
    149                     TII);
    150   }
    151   MBB.erase(I);
    152 }
    153 
    154 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
    155     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
    156     unsigned FramePtr) const {
    157   MachineFunction &MF = *MBB.getParent();
    158   MachineFrameInfo *MFI = MF.getFrameInfo();
    159   MachineModuleInfo &MMI = MF.getMMI();
    160   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
    161   const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
    162   DebugLoc DL = MBB.findDebugLoc(MBBI);
    163 
    164   // Add callee saved registers to move list.
    165   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    166   if (CSI.empty())
    167     return;
    168 
    169   const DataLayout *TD = MF.getTarget().getDataLayout();
    170   bool HasFP = hasFP(MF);
    171 
    172   // Calculate amount of bytes used for return address storing.
    173   int stackGrowth = -TD->getPointerSize(0);
    174 
    175   // Calculate offsets.
    176   int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
    177   unsigned TotalSkipped = 0;
    178   for (const auto &Info : CSI) {
    179     unsigned Reg = Info.getReg();
    180     int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
    181                      getOffsetOfLocalArea() + saveAreaOffset;
    182 
    183     // Don't output a new CFI directive if we're re-saving the frame pointer or
    184     // link register. This happens when the PrologEpilogInserter has inserted an
    185     // extra "STP" of the frame pointer and link register -- the "emitPrologue"
    186     // method automatically generates the directives when frame pointers are
    187     // used. If we generate CFI directives for the extra "STP"s, the linker will
    188     // lose track of the correct values for the frame pointer and link register.
    189     if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
    190       TotalSkipped += stackGrowth;
    191       continue;
    192     }
    193 
    194     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
    195     unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
    196         nullptr, DwarfReg, Offset - TotalSkipped));
    197     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    198         .addCFIIndex(CFIIndex);
    199   }
    200 }
    201 
    202 void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
    203   MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB.
    204   MachineBasicBlock::iterator MBBI = MBB.begin();
    205   const MachineFrameInfo *MFI = MF.getFrameInfo();
    206   const Function *Fn = MF.getFunction();
    207   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    208       MF.getTarget().getRegisterInfo());
    209   const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
    210   MachineModuleInfo &MMI = MF.getMMI();
    211   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    212   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
    213   bool HasFP = hasFP(MF);
    214   DebugLoc DL = MBB.findDebugLoc(MBBI);
    215 
    216   int NumBytes = (int)MFI->getStackSize();
    217   if (!AFI->hasStackFrame()) {
    218     assert(!HasFP && "unexpected function without stack frame but with FP");
    219 
    220     // All of the stack allocation is for locals.
    221     AFI->setLocalStackSize(NumBytes);
    222 
    223     // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    224     MCSymbol *FrameLabel = MMI.getContext().CreateTempSymbol();
    225 
    226     // REDZONE: If the stack size is less than 128 bytes, we don't need
    227     // to actually allocate.
    228     if (NumBytes && !canUseRedZone(MF)) {
    229       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
    230                       MachineInstr::FrameSetup);
    231 
    232       // Encode the stack size of the leaf function.
    233       unsigned CFIIndex = MMI.addFrameInst(
    234           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
    235       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    236           .addCFIIndex(CFIIndex);
    237     } else if (NumBytes) {
    238       ++NumRedZoneFunctions;
    239     }
    240 
    241     return;
    242   }
    243 
    244   // Only set up FP if we actually need to.
    245   int FPOffset = 0;
    246   if (HasFP) {
    247     // First instruction must a) allocate the stack  and b) have an immediate
    248     // that is a multiple of -2.
    249     assert((MBBI->getOpcode() == AArch64::STPXpre ||
    250             MBBI->getOpcode() == AArch64::STPDpre) &&
    251            MBBI->getOperand(3).getReg() == AArch64::SP &&
    252            MBBI->getOperand(4).getImm() < 0 &&
    253            (MBBI->getOperand(4).getImm() & 1) == 0);
    254 
    255     // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
    256     // required for the callee saved register area we get the frame pointer
    257     // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
    258     FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
    259     assert(FPOffset >= 0 && "Bad Framepointer Offset");
    260   }
    261 
    262   // Move past the saves of the callee-saved registers.
    263   while (MBBI->getOpcode() == AArch64::STPXi ||
    264          MBBI->getOpcode() == AArch64::STPDi ||
    265          MBBI->getOpcode() == AArch64::STPXpre ||
    266          MBBI->getOpcode() == AArch64::STPDpre) {
    267     ++MBBI;
    268     NumBytes -= 16;
    269   }
    270   assert(NumBytes >= 0 && "Negative stack allocation size!?");
    271   if (HasFP) {
    272     // Issue    sub fp, sp, FPOffset or
    273     //          mov fp,sp          when FPOffset is zero.
    274     // Note: All stores of callee-saved registers are marked as "FrameSetup".
    275     // This code marks the instruction(s) that set the FP also.
    276     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
    277                     MachineInstr::FrameSetup);
    278   }
    279 
    280   // All of the remaining stack allocations are for locals.
    281   AFI->setLocalStackSize(NumBytes);
    282 
    283   // Allocate space for the rest of the frame.
    284   if (NumBytes) {
    285     // If we're a leaf function, try using the red zone.
    286     if (!canUseRedZone(MF))
    287       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
    288                       MachineInstr::FrameSetup);
    289   }
    290 
    291   // If we need a base pointer, set it up here. It's whatever the value of the
    292   // stack pointer is at this point. Any variable size objects will be allocated
    293   // after this, so we can still use the base pointer to reference locals.
    294   //
    295   // FIXME: Clarify FrameSetup flags here.
    296   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
    297   // needed.
    298   //
    299   if (RegInfo->hasBasePointer(MF))
    300     TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
    301 
    302   if (needsFrameMoves) {
    303     const DataLayout *TD = MF.getTarget().getDataLayout();
    304     const int StackGrowth = -TD->getPointerSize(0);
    305     unsigned FramePtr = RegInfo->getFrameRegister(MF);
    306 
    307     // An example of the prologue:
    308     //
    309     //     .globl __foo
    310     //     .align 2
    311     //  __foo:
    312     // Ltmp0:
    313     //     .cfi_startproc
    314     //     .cfi_personality 155, ___gxx_personality_v0
    315     // Leh_func_begin:
    316     //     .cfi_lsda 16, Lexception33
    317     //
    318     //     stp  xa,bx, [sp, -#offset]!
    319     //     ...
    320     //     stp  x28, x27, [sp, #offset-32]
    321     //     stp  fp, lr, [sp, #offset-16]
    322     //     add  fp, sp, #offset - 16
    323     //     sub  sp, sp, #1360
    324     //
    325     // The Stack:
    326     //       +-------------------------------------------+
    327     // 10000 | ........ | ........ | ........ | ........ |
    328     // 10004 | ........ | ........ | ........ | ........ |
    329     //       +-------------------------------------------+
    330     // 10008 | ........ | ........ | ........ | ........ |
    331     // 1000c | ........ | ........ | ........ | ........ |
    332     //       +===========================================+
    333     // 10010 |                X28 Register               |
    334     // 10014 |                X28 Register               |
    335     //       +-------------------------------------------+
    336     // 10018 |                X27 Register               |
    337     // 1001c |                X27 Register               |
    338     //       +===========================================+
    339     // 10020 |                Frame Pointer              |
    340     // 10024 |                Frame Pointer              |
    341     //       +-------------------------------------------+
    342     // 10028 |                Link Register              |
    343     // 1002c |                Link Register              |
    344     //       +===========================================+
    345     // 10030 | ........ | ........ | ........ | ........ |
    346     // 10034 | ........ | ........ | ........ | ........ |
    347     //       +-------------------------------------------+
    348     // 10038 | ........ | ........ | ........ | ........ |
    349     // 1003c | ........ | ........ | ........ | ........ |
    350     //       +-------------------------------------------+
    351     //
    352     //     [sp] = 10030        ::    >>initial value<<
    353     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    354     //     fp = sp == 10020    ::  mov fp, sp
    355     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    356     //     sp == 10010         ::    >>final value<<
    357     //
    358     // The frame pointer (w29) points to address 10020. If we use an offset of
    359     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    360     // for w27, and -32 for w28:
    361     //
    362     //  Ltmp1:
    363     //     .cfi_def_cfa w29, 16
    364     //  Ltmp2:
    365     //     .cfi_offset w30, -8
    366     //  Ltmp3:
    367     //     .cfi_offset w29, -16
    368     //  Ltmp4:
    369     //     .cfi_offset w27, -24
    370     //  Ltmp5:
    371     //     .cfi_offset w28, -32
    372 
    373     if (HasFP) {
    374       // Define the current CFA rule to use the provided FP.
    375       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
    376       unsigned CFIIndex = MMI.addFrameInst(
    377           MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
    378       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    379           .addCFIIndex(CFIIndex);
    380 
    381       // Record the location of the stored LR
    382       unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
    383       CFIIndex = MMI.addFrameInst(
    384           MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
    385       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    386           .addCFIIndex(CFIIndex);
    387 
    388       // Record the location of the stored FP
    389       CFIIndex = MMI.addFrameInst(
    390           MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
    391       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    392           .addCFIIndex(CFIIndex);
    393     } else {
    394       // Encode the stack size of the leaf function.
    395       unsigned CFIIndex = MMI.addFrameInst(
    396           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
    397       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    398           .addCFIIndex(CFIIndex);
    399     }
    400 
    401     // Now emit the moves for whatever callee saved regs we have.
    402     emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
    403   }
    404 }
    405 
    406 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
    407   for (unsigned i = 0; CSRegs[i]; ++i)
    408     if (Reg == CSRegs[i])
    409       return true;
    410   return false;
    411 }
    412 
    413 static bool isCSRestore(MachineInstr *MI, const MCPhysReg *CSRegs) {
    414   unsigned RtIdx = 0;
    415   if (MI->getOpcode() == AArch64::LDPXpost ||
    416       MI->getOpcode() == AArch64::LDPDpost)
    417     RtIdx = 1;
    418 
    419   if (MI->getOpcode() == AArch64::LDPXpost ||
    420       MI->getOpcode() == AArch64::LDPDpost ||
    421       MI->getOpcode() == AArch64::LDPXi || MI->getOpcode() == AArch64::LDPDi) {
    422     if (!isCalleeSavedRegister(MI->getOperand(RtIdx).getReg(), CSRegs) ||
    423         !isCalleeSavedRegister(MI->getOperand(RtIdx + 1).getReg(), CSRegs) ||
    424         MI->getOperand(RtIdx + 2).getReg() != AArch64::SP)
    425       return false;
    426     return true;
    427   }
    428 
    429   return false;
    430 }
    431 
    432 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
    433                                         MachineBasicBlock &MBB) const {
    434   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    435   assert(MBBI->isReturn() && "Can only insert epilog into returning blocks");
    436   MachineFrameInfo *MFI = MF.getFrameInfo();
    437   const AArch64InstrInfo *TII =
    438       static_cast<const AArch64InstrInfo *>(MF.getTarget().getInstrInfo());
    439   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    440       MF.getTarget().getRegisterInfo());
    441   DebugLoc DL = MBBI->getDebugLoc();
    442   unsigned RetOpcode = MBBI->getOpcode();
    443 
    444   int NumBytes = MFI->getStackSize();
    445   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    446 
    447   // Initial and residual are named for consitency with the prologue. Note that
    448   // in the epilogue, the residual adjustment is executed first.
    449   uint64_t ArgumentPopSize = 0;
    450   if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
    451     MachineOperand &StackAdjust = MBBI->getOperand(1);
    452 
    453     // For a tail-call in a callee-pops-arguments environment, some or all of
    454     // the stack may actually be in use for the call's arguments, this is
    455     // calculated during LowerCall and consumed here...
    456     ArgumentPopSize = StackAdjust.getImm();
    457   } else {
    458     // ... otherwise the amount to pop is *all* of the argument space,
    459     // conveniently stored in the MachineFunctionInfo by
    460     // LowerFormalArguments. This will, of course, be zero for the C calling
    461     // convention.
    462     ArgumentPopSize = AFI->getArgumentStackToRestore();
    463   }
    464 
    465   // The stack frame should be like below,
    466   //
    467   //      ----------------------                     ---
    468   //      |                    |                      |
    469   //      | BytesInStackArgArea|              CalleeArgStackSize
    470   //      | (NumReusableBytes) |                (of tail call)
    471   //      |                    |                     ---
    472   //      |                    |                      |
    473   //      ---------------------|        ---           |
    474   //      |                    |         |            |
    475   //      |   CalleeSavedReg   |         |            |
    476   //      | (NumRestores * 16) |         |            |
    477   //      |                    |         |            |
    478   //      ---------------------|         |         NumBytes
    479   //      |                    |     StackSize  (StackAdjustUp)
    480   //      |   LocalStackSize   |         |            |
    481   //      | (covering callee   |         |            |
    482   //      |       args)        |         |            |
    483   //      |                    |         |            |
    484   //      ----------------------        ---          ---
    485   //
    486   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
    487   //             = StackSize + ArgumentPopSize
    488   //
    489   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
    490   // it as the 2nd argument of AArch64ISD::TC_RETURN.
    491   NumBytes += ArgumentPopSize;
    492 
    493   unsigned NumRestores = 0;
    494   // Move past the restores of the callee-saved registers.
    495   MachineBasicBlock::iterator LastPopI = MBBI;
    496   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    497   if (LastPopI != MBB.begin()) {
    498     do {
    499       ++NumRestores;
    500       --LastPopI;
    501     } while (LastPopI != MBB.begin() && isCSRestore(LastPopI, CSRegs));
    502     if (!isCSRestore(LastPopI, CSRegs)) {
    503       ++LastPopI;
    504       --NumRestores;
    505     }
    506   }
    507   NumBytes -= NumRestores * 16;
    508   assert(NumBytes >= 0 && "Negative stack allocation size!?");
    509 
    510   if (!hasFP(MF)) {
    511     // If this was a redzone leaf function, we don't need to restore the
    512     // stack pointer.
    513     if (!canUseRedZone(MF))
    514       emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
    515                       TII);
    516     return;
    517   }
    518 
    519   // Restore the original stack pointer.
    520   // FIXME: Rather than doing the math here, we should instead just use
    521   // non-post-indexed loads for the restores if we aren't actually going to
    522   // be able to save any instructions.
    523   if (NumBytes || MFI->hasVarSizedObjects())
    524     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
    525                     -(NumRestores - 1) * 16, TII, MachineInstr::NoFlags);
    526 }
    527 
    528 /// getFrameIndexOffset - Returns the displacement from the frame register to
    529 /// the stack frame of the specified index.
    530 int AArch64FrameLowering::getFrameIndexOffset(const MachineFunction &MF,
    531                                               int FI) const {
    532   unsigned FrameReg;
    533   return getFrameIndexReference(MF, FI, FrameReg);
    534 }
    535 
    536 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
    537 /// debug info.  It's the same as what we use for resolving the code-gen
    538 /// references for now.  FIXME: This can go wrong when references are
    539 /// SP-relative and simple call frames aren't used.
    540 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
    541                                                  int FI,
    542                                                  unsigned &FrameReg) const {
    543   return resolveFrameIndexReference(MF, FI, FrameReg);
    544 }
    545 
    546 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
    547                                                      int FI, unsigned &FrameReg,
    548                                                      bool PreferFP) const {
    549   const MachineFrameInfo *MFI = MF.getFrameInfo();
    550   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    551       MF.getTarget().getRegisterInfo());
    552   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    553   int FPOffset = MFI->getObjectOffset(FI) + 16;
    554   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
    555   bool isFixed = MFI->isFixedObjectIndex(FI);
    556 
    557   // Use frame pointer to reference fixed objects. Use it for locals if
    558   // there are VLAs (and thus the SP isn't reliable as a base).
    559   // Make sure useFPForScavengingIndex() does the right thing for the emergency
    560   // spill slot.
    561   bool UseFP = false;
    562   if (AFI->hasStackFrame()) {
    563     // Note: Keeping the following as multiple 'if' statements rather than
    564     // merging to a single expression for readability.
    565     //
    566     // Argument access should always use the FP.
    567     if (isFixed) {
    568       UseFP = hasFP(MF);
    569     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
    570       // Use SP or FP, whichever gives us the best chance of the offset
    571       // being in range for direct access. If the FPOffset is positive,
    572       // that'll always be best, as the SP will be even further away.
    573       // If the FPOffset is negative, we have to keep in mind that the
    574       // available offset range for negative offsets is smaller than for
    575       // positive ones. If we have variable sized objects, we're stuck with
    576       // using the FP regardless, though, as the SP offset is unknown
    577       // and we don't have a base pointer available. If an offset is
    578       // available via the FP and the SP, use whichever is closest.
    579       if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
    580           (FPOffset >= -256 && Offset > -FPOffset))
    581         UseFP = true;
    582     }
    583   }
    584 
    585   if (UseFP) {
    586     FrameReg = RegInfo->getFrameRegister(MF);
    587     return FPOffset;
    588   }
    589 
    590   // Use the base pointer if we have one.
    591   if (RegInfo->hasBasePointer(MF))
    592     FrameReg = RegInfo->getBaseRegister();
    593   else {
    594     FrameReg = AArch64::SP;
    595     // If we're using the red zone for this function, the SP won't actually
    596     // be adjusted, so the offsets will be negative. They're also all
    597     // within range of the signed 9-bit immediate instructions.
    598     if (canUseRedZone(MF))
    599       Offset -= AFI->getLocalStackSize();
    600   }
    601 
    602   return Offset;
    603 }
    604 
    605 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
    606   if (Reg != AArch64::LR)
    607     return getKillRegState(true);
    608 
    609   // LR maybe referred to later by an @llvm.returnaddress intrinsic.
    610   bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
    611   bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
    612   return getKillRegState(LRKill);
    613 }
    614 
    615 bool AArch64FrameLowering::spillCalleeSavedRegisters(
    616     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    617     const std::vector<CalleeSavedInfo> &CSI,
    618     const TargetRegisterInfo *TRI) const {
    619   MachineFunction &MF = *MBB.getParent();
    620   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    621   unsigned Count = CSI.size();
    622   DebugLoc DL;
    623   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    624 
    625   if (MI != MBB.end())
    626     DL = MI->getDebugLoc();
    627 
    628   for (unsigned i = 0; i < Count; i += 2) {
    629     unsigned idx = Count - i - 2;
    630     unsigned Reg1 = CSI[idx].getReg();
    631     unsigned Reg2 = CSI[idx + 1].getReg();
    632     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    633     // list to come in sorted by frame index so that we can issue the store
    634     // pair instructions directly. Assert if we see anything otherwise.
    635     //
    636     // The order of the registers in the list is controlled by
    637     // getCalleeSavedRegs(), so they will always be in-order, as well.
    638     assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
    639            "Out of order callee saved regs!");
    640     unsigned StrOpc;
    641     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    642     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
    643     // Issue sequence of non-sp increment and pi sp spills for cs regs. The
    644     // first spill is a pre-increment that allocates the stack.
    645     // For example:
    646     //    stp     x22, x21, [sp, #-48]!   // addImm(-6)
    647     //    stp     x20, x19, [sp, #16]    // addImm(+2)
    648     //    stp     fp, lr, [sp, #32]      // addImm(+4)
    649     // Rationale: This sequence saves uop updates compared to a sequence of
    650     // pre-increment spills like stp xi,xj,[sp,#-16]!
    651     // Note: Similar rational and sequence for restores in epilog.
    652     if (AArch64::GPR64RegClass.contains(Reg1)) {
    653       assert(AArch64::GPR64RegClass.contains(Reg2) &&
    654              "Expected GPR64 callee-saved register pair!");
    655       // For first spill use pre-increment store.
    656       if (i == 0)
    657         StrOpc = AArch64::STPXpre;
    658       else
    659         StrOpc = AArch64::STPXi;
    660     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
    661       assert(AArch64::FPR64RegClass.contains(Reg2) &&
    662              "Expected FPR64 callee-saved register pair!");
    663       // For first spill use pre-increment store.
    664       if (i == 0)
    665         StrOpc = AArch64::STPDpre;
    666       else
    667         StrOpc = AArch64::STPDi;
    668     } else
    669       llvm_unreachable("Unexpected callee saved register!");
    670     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
    671                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
    672                  << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
    673     // Compute offset: i = 0 => offset = -Count;
    674     //                 i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
    675     const int Offset = (i == 0) ? -Count : i;
    676     assert((Offset >= -64 && Offset <= 63) &&
    677            "Offset out of bounds for STP immediate");
    678     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
    679     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
    680       MIB.addReg(AArch64::SP, RegState::Define);
    681 
    682     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
    683         .addReg(Reg1, getPrologueDeath(MF, Reg1))
    684         .addReg(AArch64::SP)
    685         .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
    686         .setMIFlag(MachineInstr::FrameSetup);
    687   }
    688   return true;
    689 }
    690 
    691 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
    692     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    693     const std::vector<CalleeSavedInfo> &CSI,
    694     const TargetRegisterInfo *TRI) const {
    695   MachineFunction &MF = *MBB.getParent();
    696   const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo();
    697   unsigned Count = CSI.size();
    698   DebugLoc DL;
    699   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    700 
    701   if (MI != MBB.end())
    702     DL = MI->getDebugLoc();
    703 
    704   for (unsigned i = 0; i < Count; i += 2) {
    705     unsigned Reg1 = CSI[i].getReg();
    706     unsigned Reg2 = CSI[i + 1].getReg();
    707     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    708     // list to come in sorted by frame index so that we can issue the store
    709     // pair instructions directly. Assert if we see anything otherwise.
    710     assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
    711            "Out of order callee saved regs!");
    712     // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
    713     // the last load is sp-pi post-increment and de-allocates the stack:
    714     // For example:
    715     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
    716     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
    717     //    ldp     x22, x21, [sp], #48     // addImm(+6)
    718     // Note: see comment in spillCalleeSavedRegisters()
    719     unsigned LdrOpc;
    720 
    721     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    722     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
    723     if (AArch64::GPR64RegClass.contains(Reg1)) {
    724       assert(AArch64::GPR64RegClass.contains(Reg2) &&
    725              "Expected GPR64 callee-saved register pair!");
    726       if (i == Count - 2)
    727         LdrOpc = AArch64::LDPXpost;
    728       else
    729         LdrOpc = AArch64::LDPXi;
    730     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
    731       assert(AArch64::FPR64RegClass.contains(Reg2) &&
    732              "Expected FPR64 callee-saved register pair!");
    733       if (i == Count - 2)
    734         LdrOpc = AArch64::LDPDpost;
    735       else
    736         LdrOpc = AArch64::LDPDi;
    737     } else
    738       llvm_unreachable("Unexpected callee saved register!");
    739     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
    740                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
    741                  << ", " << CSI[i + 1].getFrameIdx() << ")\n");
    742 
    743     // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
    744     // etc.
    745     const int Offset = (i == Count - 2) ? Count : Count - i - 2;
    746     assert((Offset >= -64 && Offset <= 63) &&
    747            "Offset out of bounds for LDP immediate");
    748     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
    749     if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
    750       MIB.addReg(AArch64::SP, RegState::Define);
    751 
    752     MIB.addReg(Reg2, getDefRegState(true))
    753         .addReg(Reg1, getDefRegState(true))
    754         .addReg(AArch64::SP)
    755         .addImm(Offset); // [sp], #offset * 8  or [sp, #offset * 8]
    756                          // where the factor * 8 is implicit
    757   }
    758   return true;
    759 }
    760 
    761 void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
    762     MachineFunction &MF, RegScavenger *RS) const {
    763   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    764       MF.getTarget().getRegisterInfo());
    765   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    766   MachineRegisterInfo *MRI = &MF.getRegInfo();
    767   SmallVector<unsigned, 4> UnspilledCSGPRs;
    768   SmallVector<unsigned, 4> UnspilledCSFPRs;
    769 
    770   // The frame record needs to be created by saving the appropriate registers
    771   if (hasFP(MF)) {
    772     MRI->setPhysRegUsed(AArch64::FP);
    773     MRI->setPhysRegUsed(AArch64::LR);
    774   }
    775 
    776   // Spill the BasePtr if it's used. Do this first thing so that the
    777   // getCalleeSavedRegs() below will get the right answer.
    778   if (RegInfo->hasBasePointer(MF))
    779     MRI->setPhysRegUsed(RegInfo->getBaseRegister());
    780 
    781   // If any callee-saved registers are used, the frame cannot be eliminated.
    782   unsigned NumGPRSpilled = 0;
    783   unsigned NumFPRSpilled = 0;
    784   bool ExtraCSSpill = false;
    785   bool CanEliminateFrame = true;
    786   DEBUG(dbgs() << "*** processFunctionBeforeCalleeSavedScan\nUsed CSRs:");
    787   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    788 
    789   // Check pairs of consecutive callee-saved registers.
    790   for (unsigned i = 0; CSRegs[i]; i += 2) {
    791     assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
    792 
    793     const unsigned OddReg = CSRegs[i];
    794     const unsigned EvenReg = CSRegs[i + 1];
    795     assert((AArch64::GPR64RegClass.contains(OddReg) &&
    796             AArch64::GPR64RegClass.contains(EvenReg)) ^
    797                (AArch64::FPR64RegClass.contains(OddReg) &&
    798                 AArch64::FPR64RegClass.contains(EvenReg)) &&
    799            "Register class mismatch!");
    800 
    801     const bool OddRegUsed = MRI->isPhysRegUsed(OddReg);
    802     const bool EvenRegUsed = MRI->isPhysRegUsed(EvenReg);
    803 
    804     // Early exit if none of the registers in the register pair is actually
    805     // used.
    806     if (!OddRegUsed && !EvenRegUsed) {
    807       if (AArch64::GPR64RegClass.contains(OddReg)) {
    808         UnspilledCSGPRs.push_back(OddReg);
    809         UnspilledCSGPRs.push_back(EvenReg);
    810       } else {
    811         UnspilledCSFPRs.push_back(OddReg);
    812         UnspilledCSFPRs.push_back(EvenReg);
    813       }
    814       continue;
    815     }
    816 
    817     unsigned Reg = AArch64::NoRegister;
    818     // If only one of the registers of the register pair is used, make sure to
    819     // mark the other one as used as well.
    820     if (OddRegUsed ^ EvenRegUsed) {
    821       // Find out which register is the additional spill.
    822       Reg = OddRegUsed ? EvenReg : OddReg;
    823       MRI->setPhysRegUsed(Reg);
    824     }
    825 
    826     DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
    827     DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
    828 
    829     assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
    830             (RegInfo->getEncodingValue(OddReg) + 1 ==
    831              RegInfo->getEncodingValue(EvenReg))) &&
    832            "Register pair of non-adjacent registers!");
    833     if (AArch64::GPR64RegClass.contains(OddReg)) {
    834       NumGPRSpilled += 2;
    835       // If it's not a reserved register, we can use it in lieu of an
    836       // emergency spill slot for the register scavenger.
    837       // FIXME: It would be better to instead keep looking and choose another
    838       // unspilled register that isn't reserved, if there is one.
    839       if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
    840         ExtraCSSpill = true;
    841     } else
    842       NumFPRSpilled += 2;
    843 
    844     CanEliminateFrame = false;
    845   }
    846 
    847   // FIXME: Set BigStack if any stack slot references may be out of range.
    848   // For now, just conservatively guestimate based on unscaled indexing
    849   // range. We'll end up allocating an unnecessary spill slot a lot, but
    850   // realistically that's not a big deal at this stage of the game.
    851   // The CSR spill slots have not been allocated yet, so estimateStackSize
    852   // won't include them.
    853   MachineFrameInfo *MFI = MF.getFrameInfo();
    854   unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
    855   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
    856   bool BigStack = (CFSize >= 256);
    857   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    858     AFI->setHasStackFrame(true);
    859 
    860   // Estimate if we might need to scavenge a register at some point in order
    861   // to materialize a stack offset. If so, either spill one additional
    862   // callee-saved register or reserve a special spill slot to facilitate
    863   // register scavenging. If we already spilled an extra callee-saved register
    864   // above to keep the number of spills even, we don't need to do anything else
    865   // here.
    866   if (BigStack && !ExtraCSSpill) {
    867 
    868     // If we're adding a register to spill here, we have to add two of them
    869     // to keep the number of regs to spill even.
    870     assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
    871     unsigned Count = 0;
    872     while (!UnspilledCSGPRs.empty() && Count < 2) {
    873       unsigned Reg = UnspilledCSGPRs.back();
    874       UnspilledCSGPRs.pop_back();
    875       DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
    876                    << " to get a scratch register.\n");
    877       MRI->setPhysRegUsed(Reg);
    878       ExtraCSSpill = true;
    879       ++Count;
    880     }
    881 
    882     // If we didn't find an extra callee-saved register to spill, create
    883     // an emergency spill slot.
    884     if (!ExtraCSSpill) {
    885       const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
    886       int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
    887       RS->addScavengingFrameIndex(FI);
    888       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
    889                    << " as the emergency spill slot.\n");
    890     }
    891   }
    892 }
    893