Home | History | Annotate | Download | only in AArch64
      1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file contains the AArch64 implementation of TargetFrameLowering class.
     11 //
     12 // On AArch64, stack frames are structured as follows:
     13 //
     14 // The stack grows downward.
     15 //
     16 // All of the individual frame areas on the frame below are optional, i.e. it's
     17 // possible to create a function so that the particular area isn't present
     18 // in the frame.
     19 //
     20 // At function entry, the "frame" looks as follows:
     21 //
     22 // |                                   | Higher address
     23 // |-----------------------------------|
     24 // |                                   |
     25 // | arguments passed on the stack     |
     26 // |                                   |
     27 // |-----------------------------------| <- sp
     28 // |                                   | Lower address
     29 //
     30 //
     31 // After the prologue has run, the frame has the following general structure.
     32 // Note that this doesn't depict the case where a red-zone is used. Also,
     33 // technically the last frame area (VLAs) doesn't get created until in the
     34 // main function body, after the prologue is run. However, it's depicted here
     35 // for completeness.
     36 //
     37 // |                                   | Higher address
     38 // |-----------------------------------|
     39 // |                                   |
     40 // | arguments passed on the stack     |
     41 // |                                   |
     42 // |-----------------------------------|
     43 // |                                   |
     44 // | prev_fp, prev_lr                  |
     45 // | (a.k.a. "frame record")           |
     46 // |-----------------------------------| <- fp(=x29)
     47 // |                                   |
     48 // | other callee-saved registers      |
     49 // |                                   |
     50 // |-----------------------------------|
     51 // |.empty.space.to.make.part.below....|
     52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
     53 // |.the.standard.16-byte.alignment....|  compile time; if present)
     54 // |-----------------------------------|
     55 // |                                   |
     56 // | local variables of fixed size     |
     57 // | including spill slots             |
     58 // |-----------------------------------| <- bp(not defined by ABI,
     59 // |.variable-sized.local.variables....|       LLVM chooses X19)
     60 // |.(VLAs)............................| (size of this area is unknown at
     61 // |...................................|  compile time)
     62 // |-----------------------------------| <- sp
     63 // |                                   | Lower address
     64 //
     65 //
     66 // To access the data in a frame, at-compile time, a constant offset must be
     67 // computable from one of the pointers (fp, bp, sp) to access it. The size
     68 // of the areas with a dotted background cannot be computed at compile-time
     69 // if they are present, making it required to have all three of fp, bp and
     70 // sp to be set up to be able to access all contents in the frame areas,
     71 // assuming all of the frame areas are non-empty.
     72 //
     73 // For most functions, some of the frame areas are empty. For those functions,
     74 // it may not be necessary to set up fp or bp:
     75 // * A base pointer is definitely needed when there are both VLAs and local
     76 //   variables with more-than-default alignment requirements.
     77 // * A frame pointer is definitely needed when there are local variables with
     78 //   more-than-default alignment requirements.
     79 //
     80 // In some cases when a base pointer is not strictly needed, it is generated
     81 // anyway when offsets from the frame pointer to access local variables become
     82 // so large that the offset can't be encoded in the immediate fields of loads
     83 // or stores.
     84 //
     85 // FIXME: also explain the redzone concept.
     86 // FIXME: also explain the concept of reserved call frames.
     87 //
     88 //===----------------------------------------------------------------------===//
     89 
     90 #include "AArch64FrameLowering.h"
     91 #include "AArch64InstrInfo.h"
     92 #include "AArch64MachineFunctionInfo.h"
     93 #include "AArch64Subtarget.h"
     94 #include "AArch64TargetMachine.h"
     95 #include "llvm/ADT/Statistic.h"
     96 #include "llvm/CodeGen/MachineFrameInfo.h"
     97 #include "llvm/CodeGen/MachineFunction.h"
     98 #include "llvm/CodeGen/MachineInstrBuilder.h"
     99 #include "llvm/CodeGen/MachineModuleInfo.h"
    100 #include "llvm/CodeGen/MachineRegisterInfo.h"
    101 #include "llvm/CodeGen/RegisterScavenging.h"
    102 #include "llvm/IR/DataLayout.h"
    103 #include "llvm/IR/Function.h"
    104 #include "llvm/Support/CommandLine.h"
    105 #include "llvm/Support/Debug.h"
    106 #include "llvm/Support/raw_ostream.h"
    107 
    108 using namespace llvm;
    109 
    110 #define DEBUG_TYPE "frame-info"
    111 
    112 static cl::opt<bool> EnableRedZone("aarch64-redzone",
    113                                    cl::desc("enable use of redzone on AArch64"),
    114                                    cl::init(false), cl::Hidden);
    115 
    116 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
    117 
    118 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
    119   if (!EnableRedZone)
    120     return false;
    121   // Don't use the red zone if the function explicitly asks us not to.
    122   // This is typically used for kernel code.
    123   if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
    124     return false;
    125 
    126   const MachineFrameInfo *MFI = MF.getFrameInfo();
    127   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    128   unsigned NumBytes = AFI->getLocalStackSize();
    129 
    130   // Note: currently hasFP() is always true for hasCalls(), but that's an
    131   // implementation detail of the current code, not a strict requirement,
    132   // so stay safe here and check both.
    133   if (MFI->hasCalls() || hasFP(MF) || NumBytes > 128)
    134     return false;
    135   return true;
    136 }
    137 
    138 /// hasFP - Return true if the specified function should have a dedicated frame
    139 /// pointer register.
    140 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
    141   const MachineFrameInfo *MFI = MF.getFrameInfo();
    142   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
    143   return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
    144           MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
    145           MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
    146 }
    147 
    148 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
    149 /// not required, we reserve argument space for call sites in the function
    150 /// immediately on entry to the current function.  This eliminates the need for
    151 /// add/sub sp brackets around call sites.  Returns true if the call frame is
    152 /// included as part of the stack frame.
    153 bool
    154 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
    155   return !MF.getFrameInfo()->hasVarSizedObjects();
    156 }
    157 
    158 void AArch64FrameLowering::eliminateCallFramePseudoInstr(
    159     MachineFunction &MF, MachineBasicBlock &MBB,
    160     MachineBasicBlock::iterator I) const {
    161   const AArch64InstrInfo *TII =
    162       static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
    163   DebugLoc DL = I->getDebugLoc();
    164   unsigned Opc = I->getOpcode();
    165   bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
    166   uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
    167 
    168   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
    169   if (!TFI->hasReservedCallFrame(MF)) {
    170     unsigned Align = getStackAlignment();
    171 
    172     int64_t Amount = I->getOperand(0).getImm();
    173     Amount = RoundUpToAlignment(Amount, Align);
    174     if (!IsDestroy)
    175       Amount = -Amount;
    176 
    177     // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
    178     // doesn't have to pop anything), then the first operand will be zero too so
    179     // this adjustment is a no-op.
    180     if (CalleePopAmount == 0) {
    181       // FIXME: in-function stack adjustment for calls is limited to 24-bits
    182       // because there's no guaranteed temporary register available.
    183       //
    184       // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
    185       // 1) For offset <= 12-bit, we use LSL #0
    186       // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
    187       // LSL #0, and the other uses LSL #12.
    188       //
    189       // Mostly call frames will be allocated at the start of a function so
    190       // this is OK, but it is a limitation that needs dealing with.
    191       assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
    192       emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII);
    193     }
    194   } else if (CalleePopAmount != 0) {
    195     // If the calling convention demands that the callee pops arguments from the
    196     // stack, we want to add it back if we have a reserved call frame.
    197     assert(CalleePopAmount < 0xffffff && "call frame too large");
    198     emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount,
    199                     TII);
    200   }
    201   MBB.erase(I);
    202 }
    203 
    204 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
    205     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
    206     unsigned FramePtr) const {
    207   MachineFunction &MF = *MBB.getParent();
    208   MachineFrameInfo *MFI = MF.getFrameInfo();
    209   MachineModuleInfo &MMI = MF.getMMI();
    210   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
    211   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    212   DebugLoc DL = MBB.findDebugLoc(MBBI);
    213 
    214   // Add callee saved registers to move list.
    215   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
    216   if (CSI.empty())
    217     return;
    218 
    219   const DataLayout &TD = MF.getDataLayout();
    220   bool HasFP = hasFP(MF);
    221 
    222   // Calculate amount of bytes used for return address storing.
    223   int stackGrowth = -TD.getPointerSize(0);
    224 
    225   // Calculate offsets.
    226   int64_t saveAreaOffset = (HasFP ? 2 : 1) * stackGrowth;
    227   unsigned TotalSkipped = 0;
    228   for (const auto &Info : CSI) {
    229     unsigned Reg = Info.getReg();
    230     int64_t Offset = MFI->getObjectOffset(Info.getFrameIdx()) -
    231                      getOffsetOfLocalArea() + saveAreaOffset;
    232 
    233     // Don't output a new CFI directive if we're re-saving the frame pointer or
    234     // link register. This happens when the PrologEpilogInserter has inserted an
    235     // extra "STP" of the frame pointer and link register -- the "emitPrologue"
    236     // method automatically generates the directives when frame pointers are
    237     // used. If we generate CFI directives for the extra "STP"s, the linker will
    238     // lose track of the correct values for the frame pointer and link register.
    239     if (HasFP && (FramePtr == Reg || Reg == AArch64::LR)) {
    240       TotalSkipped += stackGrowth;
    241       continue;
    242     }
    243 
    244     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
    245     unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
    246         nullptr, DwarfReg, Offset - TotalSkipped));
    247     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    248         .addCFIIndex(CFIIndex)
    249         .setMIFlags(MachineInstr::FrameSetup);
    250   }
    251 }
    252 
    253 /// Get FPOffset by analyzing the first instruction.
    254 static int getFPOffsetInPrologue(MachineInstr *MBBI) {
    255   // First instruction must a) allocate the stack  and b) have an immediate
    256   // that is a multiple of -2.
    257   assert(((MBBI->getOpcode() == AArch64::STPXpre ||
    258            MBBI->getOpcode() == AArch64::STPDpre) &&
    259           MBBI->getOperand(3).getReg() == AArch64::SP &&
    260           MBBI->getOperand(4).getImm() < 0 &&
    261           (MBBI->getOperand(4).getImm() & 1) == 0));
    262 
    263   // Frame pointer is fp = sp - 16. Since the  STPXpre subtracts the space
    264   // required for the callee saved register area we get the frame pointer
    265   // by addding that offset - 16 = -getImm()*8 - 2*8 = -(getImm() + 2) * 8.
    266   int FPOffset = -(MBBI->getOperand(4).getImm() + 2) * 8;
    267   assert(FPOffset >= 0 && "Bad Framepointer Offset");
    268   return FPOffset;
    269 }
    270 
    271 static bool isCSSave(MachineInstr *MBBI) {
    272   return MBBI->getOpcode() == AArch64::STPXi ||
    273          MBBI->getOpcode() == AArch64::STPDi ||
    274          MBBI->getOpcode() == AArch64::STPXpre ||
    275          MBBI->getOpcode() == AArch64::STPDpre;
    276 }
    277 
    278 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
    279                                         MachineBasicBlock &MBB) const {
    280   MachineBasicBlock::iterator MBBI = MBB.begin();
    281   const MachineFrameInfo *MFI = MF.getFrameInfo();
    282   const Function *Fn = MF.getFunction();
    283   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
    284   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
    285   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
    286   MachineModuleInfo &MMI = MF.getMMI();
    287   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    288   bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry();
    289   bool HasFP = hasFP(MF);
    290 
    291   // Debug location must be unknown since the first debug location is used
    292   // to determine the end of the prologue.
    293   DebugLoc DL;
    294 
    295   // All calls are tail calls in GHC calling conv, and functions have no
    296   // prologue/epilogue.
    297   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    298     return;
    299 
    300   int NumBytes = (int)MFI->getStackSize();
    301   if (!AFI->hasStackFrame()) {
    302     assert(!HasFP && "unexpected function without stack frame but with FP");
    303 
    304     // All of the stack allocation is for locals.
    305     AFI->setLocalStackSize(NumBytes);
    306 
    307     // Label used to tie together the PROLOG_LABEL and the MachineMoves.
    308     MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
    309 
    310     // REDZONE: If the stack size is less than 128 bytes, we don't need
    311     // to actually allocate.
    312     if (NumBytes && !canUseRedZone(MF)) {
    313       emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
    314                       MachineInstr::FrameSetup);
    315 
    316       // Encode the stack size of the leaf function.
    317       unsigned CFIIndex = MMI.addFrameInst(
    318           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
    319       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    320           .addCFIIndex(CFIIndex)
    321           .setMIFlags(MachineInstr::FrameSetup);
    322     } else if (NumBytes) {
    323       ++NumRedZoneFunctions;
    324     }
    325 
    326     return;
    327   }
    328 
    329   // Only set up FP if we actually need to.
    330   int FPOffset = 0;
    331   if (HasFP)
    332     FPOffset = getFPOffsetInPrologue(MBBI);
    333 
    334   // Move past the saves of the callee-saved registers.
    335   while (isCSSave(MBBI)) {
    336     ++MBBI;
    337     NumBytes -= 16;
    338   }
    339   assert(NumBytes >= 0 && "Negative stack allocation size!?");
    340   if (HasFP) {
    341     // Issue    sub fp, sp, FPOffset or
    342     //          mov fp,sp          when FPOffset is zero.
    343     // Note: All stores of callee-saved registers are marked as "FrameSetup".
    344     // This code marks the instruction(s) that set the FP also.
    345     emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII,
    346                     MachineInstr::FrameSetup);
    347   }
    348 
    349   // All of the remaining stack allocations are for locals.
    350   AFI->setLocalStackSize(NumBytes);
    351 
    352   // Allocate space for the rest of the frame.
    353 
    354   const unsigned Alignment = MFI->getMaxAlignment();
    355   const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
    356   unsigned scratchSPReg = AArch64::SP;
    357   if (NumBytes && NeedsRealignment) {
    358     // Use the first callee-saved register as a scratch register.
    359     scratchSPReg = AArch64::X9;
    360   }
    361 
    362   // If we're a leaf function, try using the red zone.
    363   if (NumBytes && !canUseRedZone(MF))
    364     // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
    365     // the correct value here, as NumBytes also includes padding bytes,
    366     // which shouldn't be counted here.
    367     emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
    368                     MachineInstr::FrameSetup);
    369 
    370   if (NumBytes && NeedsRealignment) {
    371     const unsigned NrBitsToZero = countTrailingZeros(Alignment);
    372     assert(NrBitsToZero > 1);
    373     assert(scratchSPReg != AArch64::SP);
    374 
    375     // SUB X9, SP, NumBytes
    376     //   -- X9 is temporary register, so shouldn't contain any live data here,
    377     //   -- free to use. This is already produced by emitFrameOffset above.
    378     // AND SP, X9, 0b11111...0000
    379     // The logical immediates have a non-trivial encoding. The following
    380     // formula computes the encoded immediate with all ones but
    381     // NrBitsToZero zero bits as least significant bits.
    382     uint32_t andMaskEncoded =
    383         (1                   <<12) // = N
    384       | ((64-NrBitsToZero)   << 6) // immr
    385       | ((64-NrBitsToZero-1) << 0) // imms
    386       ;
    387     BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
    388       .addReg(scratchSPReg, RegState::Kill)
    389       .addImm(andMaskEncoded);
    390   }
    391 
    392   // If we need a base pointer, set it up here. It's whatever the value of the
    393   // stack pointer is at this point. Any variable size objects will be allocated
    394   // after this, so we can still use the base pointer to reference locals.
    395   //
    396   // FIXME: Clarify FrameSetup flags here.
    397   // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
    398   // needed.
    399   if (RegInfo->hasBasePointer(MF)) {
    400     TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
    401                      false);
    402   }
    403 
    404   if (needsFrameMoves) {
    405     const DataLayout &TD = MF.getDataLayout();
    406     const int StackGrowth = -TD.getPointerSize(0);
    407     unsigned FramePtr = RegInfo->getFrameRegister(MF);
    408     // An example of the prologue:
    409     //
    410     //     .globl __foo
    411     //     .align 2
    412     //  __foo:
    413     // Ltmp0:
    414     //     .cfi_startproc
    415     //     .cfi_personality 155, ___gxx_personality_v0
    416     // Leh_func_begin:
    417     //     .cfi_lsda 16, Lexception33
    418     //
    419     //     stp  xa,bx, [sp, -#offset]!
    420     //     ...
    421     //     stp  x28, x27, [sp, #offset-32]
    422     //     stp  fp, lr, [sp, #offset-16]
    423     //     add  fp, sp, #offset - 16
    424     //     sub  sp, sp, #1360
    425     //
    426     // The Stack:
    427     //       +-------------------------------------------+
    428     // 10000 | ........ | ........ | ........ | ........ |
    429     // 10004 | ........ | ........ | ........ | ........ |
    430     //       +-------------------------------------------+
    431     // 10008 | ........ | ........ | ........ | ........ |
    432     // 1000c | ........ | ........ | ........ | ........ |
    433     //       +===========================================+
    434     // 10010 |                X28 Register               |
    435     // 10014 |                X28 Register               |
    436     //       +-------------------------------------------+
    437     // 10018 |                X27 Register               |
    438     // 1001c |                X27 Register               |
    439     //       +===========================================+
    440     // 10020 |                Frame Pointer              |
    441     // 10024 |                Frame Pointer              |
    442     //       +-------------------------------------------+
    443     // 10028 |                Link Register              |
    444     // 1002c |                Link Register              |
    445     //       +===========================================+
    446     // 10030 | ........ | ........ | ........ | ........ |
    447     // 10034 | ........ | ........ | ........ | ........ |
    448     //       +-------------------------------------------+
    449     // 10038 | ........ | ........ | ........ | ........ |
    450     // 1003c | ........ | ........ | ........ | ........ |
    451     //       +-------------------------------------------+
    452     //
    453     //     [sp] = 10030        ::    >>initial value<<
    454     //     sp = 10020          ::  stp fp, lr, [sp, #-16]!
    455     //     fp = sp == 10020    ::  mov fp, sp
    456     //     [sp] == 10020       ::  stp x28, x27, [sp, #-16]!
    457     //     sp == 10010         ::    >>final value<<
    458     //
    459     // The frame pointer (w29) points to address 10020. If we use an offset of
    460     // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
    461     // for w27, and -32 for w28:
    462     //
    463     //  Ltmp1:
    464     //     .cfi_def_cfa w29, 16
    465     //  Ltmp2:
    466     //     .cfi_offset w30, -8
    467     //  Ltmp3:
    468     //     .cfi_offset w29, -16
    469     //  Ltmp4:
    470     //     .cfi_offset w27, -24
    471     //  Ltmp5:
    472     //     .cfi_offset w28, -32
    473 
    474     if (HasFP) {
    475       // Define the current CFA rule to use the provided FP.
    476       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
    477       unsigned CFIIndex = MMI.addFrameInst(
    478           MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
    479       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    480           .addCFIIndex(CFIIndex)
    481           .setMIFlags(MachineInstr::FrameSetup);
    482 
    483       // Record the location of the stored LR
    484       unsigned LR = RegInfo->getDwarfRegNum(AArch64::LR, true);
    485       CFIIndex = MMI.addFrameInst(
    486           MCCFIInstruction::createOffset(nullptr, LR, StackGrowth));
    487       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    488           .addCFIIndex(CFIIndex)
    489           .setMIFlags(MachineInstr::FrameSetup);
    490 
    491       // Record the location of the stored FP
    492       CFIIndex = MMI.addFrameInst(
    493           MCCFIInstruction::createOffset(nullptr, Reg, 2 * StackGrowth));
    494       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    495           .addCFIIndex(CFIIndex)
    496           .setMIFlags(MachineInstr::FrameSetup);
    497     } else {
    498       // Encode the stack size of the leaf function.
    499       unsigned CFIIndex = MMI.addFrameInst(
    500           MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
    501       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
    502           .addCFIIndex(CFIIndex)
    503           .setMIFlags(MachineInstr::FrameSetup);
    504     }
    505 
    506     // Now emit the moves for whatever callee saved regs we have.
    507     emitCalleeSavedFrameMoves(MBB, MBBI, FramePtr);
    508   }
    509 }
    510 
    511 static bool isCalleeSavedRegister(unsigned Reg, const MCPhysReg *CSRegs) {
    512   for (unsigned i = 0; CSRegs[i]; ++i)
    513     if (Reg == CSRegs[i])
    514       return true;
    515   return false;
    516 }
    517 
    518 /// Checks whether the given instruction restores callee save registers
    519 /// and if so returns how many.
    520 static unsigned getNumCSRestores(MachineInstr &MI, const MCPhysReg *CSRegs) {
    521   unsigned RtIdx = 0;
    522   switch (MI.getOpcode()) {
    523   case AArch64::LDPXpost:
    524   case AArch64::LDPDpost:
    525     RtIdx = 1;
    526     // FALLTHROUGH
    527   case AArch64::LDPXi:
    528   case AArch64::LDPDi:
    529     if (!isCalleeSavedRegister(MI.getOperand(RtIdx).getReg(), CSRegs) ||
    530         !isCalleeSavedRegister(MI.getOperand(RtIdx + 1).getReg(), CSRegs) ||
    531         MI.getOperand(RtIdx + 2).getReg() != AArch64::SP)
    532       return 0;
    533     return 2;
    534   }
    535   return 0;
    536 }
    537 
    538 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
    539                                         MachineBasicBlock &MBB) const {
    540   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
    541   MachineFrameInfo *MFI = MF.getFrameInfo();
    542   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
    543   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
    544   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
    545   DebugLoc DL;
    546   bool IsTailCallReturn = false;
    547   if (MBB.end() != MBBI) {
    548     DL = MBBI->getDebugLoc();
    549     unsigned RetOpcode = MBBI->getOpcode();
    550     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
    551       RetOpcode == AArch64::TCRETURNri;
    552   }
    553   int NumBytes = MFI->getStackSize();
    554   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    555 
    556   // All calls are tail calls in GHC calling conv, and functions have no
    557   // prologue/epilogue.
    558   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    559     return;
    560 
    561   // Initial and residual are named for consistency with the prologue. Note that
    562   // in the epilogue, the residual adjustment is executed first.
    563   uint64_t ArgumentPopSize = 0;
    564   if (IsTailCallReturn) {
    565     MachineOperand &StackAdjust = MBBI->getOperand(1);
    566 
    567     // For a tail-call in a callee-pops-arguments environment, some or all of
    568     // the stack may actually be in use for the call's arguments, this is
    569     // calculated during LowerCall and consumed here...
    570     ArgumentPopSize = StackAdjust.getImm();
    571   } else {
    572     // ... otherwise the amount to pop is *all* of the argument space,
    573     // conveniently stored in the MachineFunctionInfo by
    574     // LowerFormalArguments. This will, of course, be zero for the C calling
    575     // convention.
    576     ArgumentPopSize = AFI->getArgumentStackToRestore();
    577   }
    578 
    579   // The stack frame should be like below,
    580   //
    581   //      ----------------------                     ---
    582   //      |                    |                      |
    583   //      | BytesInStackArgArea|              CalleeArgStackSize
    584   //      | (NumReusableBytes) |                (of tail call)
    585   //      |                    |                     ---
    586   //      |                    |                      |
    587   //      ---------------------|        ---           |
    588   //      |                    |         |            |
    589   //      |   CalleeSavedReg   |         |            |
    590   //      | (NumRestores * 8)  |         |            |
    591   //      |                    |         |            |
    592   //      ---------------------|         |         NumBytes
    593   //      |                    |     StackSize  (StackAdjustUp)
    594   //      |   LocalStackSize   |         |            |
    595   //      | (covering callee   |         |            |
    596   //      |       args)        |         |            |
    597   //      |                    |         |            |
    598   //      ----------------------        ---          ---
    599   //
    600   // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
    601   //             = StackSize + ArgumentPopSize
    602   //
    603   // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
    604   // it as the 2nd argument of AArch64ISD::TC_RETURN.
    605   NumBytes += ArgumentPopSize;
    606 
    607   unsigned NumRestores = 0;
    608   // Move past the restores of the callee-saved registers.
    609   MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
    610   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    611   MachineBasicBlock::iterator Begin = MBB.begin();
    612   while (LastPopI != Begin) {
    613     --LastPopI;
    614     unsigned Restores = getNumCSRestores(*LastPopI, CSRegs);
    615     NumRestores += Restores;
    616     if (Restores == 0) {
    617       ++LastPopI;
    618       break;
    619     }
    620   }
    621   NumBytes -= NumRestores * 8;
    622   assert(NumBytes >= 0 && "Negative stack allocation size!?");
    623 
    624   if (!hasFP(MF)) {
    625     // If this was a redzone leaf function, we don't need to restore the
    626     // stack pointer.
    627     if (!canUseRedZone(MF))
    628       emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
    629                       TII);
    630     return;
    631   }
    632 
    633   // Restore the original stack pointer.
    634   // FIXME: Rather than doing the math here, we should instead just use
    635   // non-post-indexed loads for the restores if we aren't actually going to
    636   // be able to save any instructions.
    637   if (NumBytes || MFI->hasVarSizedObjects())
    638     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
    639                     -(NumRestores - 2) * 8, TII, MachineInstr::NoFlags);
    640 }
    641 
    642 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
    643 /// debug info.  It's the same as what we use for resolving the code-gen
    644 /// references for now.  FIXME: This can go wrong when references are
    645 /// SP-relative and simple call frames aren't used.
    646 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
    647                                                  int FI,
    648                                                  unsigned &FrameReg) const {
    649   return resolveFrameIndexReference(MF, FI, FrameReg);
    650 }
    651 
    652 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
    653                                                      int FI, unsigned &FrameReg,
    654                                                      bool PreferFP) const {
    655   const MachineFrameInfo *MFI = MF.getFrameInfo();
    656   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    657       MF.getSubtarget().getRegisterInfo());
    658   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    659   int FPOffset = MFI->getObjectOffset(FI) + 16;
    660   int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
    661   bool isFixed = MFI->isFixedObjectIndex(FI);
    662 
    663   // Use frame pointer to reference fixed objects. Use it for locals if
    664   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
    665   // reliable as a base). Make sure useFPForScavengingIndex() does the
    666   // right thing for the emergency spill slot.
    667   bool UseFP = false;
    668   if (AFI->hasStackFrame()) {
    669     // Note: Keeping the following as multiple 'if' statements rather than
    670     // merging to a single expression for readability.
    671     //
    672     // Argument access should always use the FP.
    673     if (isFixed) {
    674       UseFP = hasFP(MF);
    675     } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
    676                !RegInfo->needsStackRealignment(MF)) {
    677       // Use SP or FP, whichever gives us the best chance of the offset
    678       // being in range for direct access. If the FPOffset is positive,
    679       // that'll always be best, as the SP will be even further away.
    680       // If the FPOffset is negative, we have to keep in mind that the
    681       // available offset range for negative offsets is smaller than for
    682       // positive ones. If we have variable sized objects, we're stuck with
    683       // using the FP regardless, though, as the SP offset is unknown
    684       // and we don't have a base pointer available. If an offset is
    685       // available via the FP and the SP, use whichever is closest.
    686       if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
    687           (FPOffset >= -256 && Offset > -FPOffset))
    688         UseFP = true;
    689     }
    690   }
    691 
    692   assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
    693          "In the presence of dynamic stack pointer realignment, "
    694          "non-argument objects cannot be accessed through the frame pointer");
    695 
    696   if (UseFP) {
    697     FrameReg = RegInfo->getFrameRegister(MF);
    698     return FPOffset;
    699   }
    700 
    701   // Use the base pointer if we have one.
    702   if (RegInfo->hasBasePointer(MF))
    703     FrameReg = RegInfo->getBaseRegister();
    704   else {
    705     FrameReg = AArch64::SP;
    706     // If we're using the red zone for this function, the SP won't actually
    707     // be adjusted, so the offsets will be negative. They're also all
    708     // within range of the signed 9-bit immediate instructions.
    709     if (canUseRedZone(MF))
    710       Offset -= AFI->getLocalStackSize();
    711   }
    712 
    713   return Offset;
    714 }
    715 
    716 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
    717   if (Reg != AArch64::LR)
    718     return getKillRegState(true);
    719 
    720   // LR maybe referred to later by an @llvm.returnaddress intrinsic.
    721   bool LRLiveIn = MF.getRegInfo().isLiveIn(AArch64::LR);
    722   bool LRKill = !(LRLiveIn && MF.getFrameInfo()->isReturnAddressTaken());
    723   return getKillRegState(LRKill);
    724 }
    725 
    726 bool AArch64FrameLowering::spillCalleeSavedRegisters(
    727     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    728     const std::vector<CalleeSavedInfo> &CSI,
    729     const TargetRegisterInfo *TRI) const {
    730   MachineFunction &MF = *MBB.getParent();
    731   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    732   unsigned Count = CSI.size();
    733   DebugLoc DL;
    734   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    735 
    736   for (unsigned i = 0; i < Count; i += 2) {
    737     unsigned idx = Count - i - 2;
    738     unsigned Reg1 = CSI[idx].getReg();
    739     unsigned Reg2 = CSI[idx + 1].getReg();
    740     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    741     // list to come in sorted by frame index so that we can issue the store
    742     // pair instructions directly. Assert if we see anything otherwise.
    743     //
    744     // The order of the registers in the list is controlled by
    745     // getCalleeSavedRegs(), so they will always be in-order, as well.
    746     assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() &&
    747            "Out of order callee saved regs!");
    748     unsigned StrOpc;
    749     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    750     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
    751     // Issue sequence of non-sp increment and pi sp spills for cs regs. The
    752     // first spill is a pre-increment that allocates the stack.
    753     // For example:
    754     //    stp     x22, x21, [sp, #-48]!   // addImm(-6)
    755     //    stp     x20, x19, [sp, #16]    // addImm(+2)
    756     //    stp     fp, lr, [sp, #32]      // addImm(+4)
    757     // Rationale: This sequence saves uop updates compared to a sequence of
    758     // pre-increment spills like stp xi,xj,[sp,#-16]!
    759     // Note: Similar rational and sequence for restores in epilog.
    760     if (AArch64::GPR64RegClass.contains(Reg1)) {
    761       assert(AArch64::GPR64RegClass.contains(Reg2) &&
    762              "Expected GPR64 callee-saved register pair!");
    763       // For first spill use pre-increment store.
    764       if (i == 0)
    765         StrOpc = AArch64::STPXpre;
    766       else
    767         StrOpc = AArch64::STPXi;
    768     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
    769       assert(AArch64::FPR64RegClass.contains(Reg2) &&
    770              "Expected FPR64 callee-saved register pair!");
    771       // For first spill use pre-increment store.
    772       if (i == 0)
    773         StrOpc = AArch64::STPDpre;
    774       else
    775         StrOpc = AArch64::STPDi;
    776     } else
    777       llvm_unreachable("Unexpected callee saved register!");
    778     DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", "
    779                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[idx].getFrameIdx()
    780                  << ", " << CSI[idx + 1].getFrameIdx() << ")\n");
    781     // Compute offset: i = 0 => offset = -Count;
    782     //                 i = 2 => offset = -(Count - 2) + Count = 2 = i; etc.
    783     const int Offset = (i == 0) ? -Count : i;
    784     assert((Offset >= -64 && Offset <= 63) &&
    785            "Offset out of bounds for STP immediate");
    786     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
    787     if (StrOpc == AArch64::STPDpre || StrOpc == AArch64::STPXpre)
    788       MIB.addReg(AArch64::SP, RegState::Define);
    789 
    790     MBB.addLiveIn(Reg1);
    791     MBB.addLiveIn(Reg2);
    792     MIB.addReg(Reg2, getPrologueDeath(MF, Reg2))
    793         .addReg(Reg1, getPrologueDeath(MF, Reg1))
    794         .addReg(AArch64::SP)
    795         .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit
    796         .setMIFlag(MachineInstr::FrameSetup);
    797   }
    798   return true;
    799 }
    800 
    801 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
    802     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
    803     const std::vector<CalleeSavedInfo> &CSI,
    804     const TargetRegisterInfo *TRI) const {
    805   MachineFunction &MF = *MBB.getParent();
    806   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
    807   unsigned Count = CSI.size();
    808   DebugLoc DL;
    809   assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    810 
    811   if (MI != MBB.end())
    812     DL = MI->getDebugLoc();
    813 
    814   for (unsigned i = 0; i < Count; i += 2) {
    815     unsigned Reg1 = CSI[i].getReg();
    816     unsigned Reg2 = CSI[i + 1].getReg();
    817     // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
    818     // list to come in sorted by frame index so that we can issue the store
    819     // pair instructions directly. Assert if we see anything otherwise.
    820     assert(CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx() &&
    821            "Out of order callee saved regs!");
    822     // Issue sequence of non-sp increment and sp-pi restores for cs regs. Only
    823     // the last load is sp-pi post-increment and de-allocates the stack:
    824     // For example:
    825     //    ldp     fp, lr, [sp, #32]       // addImm(+4)
    826     //    ldp     x20, x19, [sp, #16]     // addImm(+2)
    827     //    ldp     x22, x21, [sp], #48     // addImm(+6)
    828     // Note: see comment in spillCalleeSavedRegisters()
    829     unsigned LdrOpc;
    830 
    831     assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!");
    832     assert((i & 1) == 0 && "Odd index for callee-saved reg spill!");
    833     if (AArch64::GPR64RegClass.contains(Reg1)) {
    834       assert(AArch64::GPR64RegClass.contains(Reg2) &&
    835              "Expected GPR64 callee-saved register pair!");
    836       if (i == Count - 2)
    837         LdrOpc = AArch64::LDPXpost;
    838       else
    839         LdrOpc = AArch64::LDPXi;
    840     } else if (AArch64::FPR64RegClass.contains(Reg1)) {
    841       assert(AArch64::FPR64RegClass.contains(Reg2) &&
    842              "Expected FPR64 callee-saved register pair!");
    843       if (i == Count - 2)
    844         LdrOpc = AArch64::LDPDpost;
    845       else
    846         LdrOpc = AArch64::LDPDi;
    847     } else
    848       llvm_unreachable("Unexpected callee saved register!");
    849     DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", "
    850                  << TRI->getName(Reg2) << ") -> fi#(" << CSI[i].getFrameIdx()
    851                  << ", " << CSI[i + 1].getFrameIdx() << ")\n");
    852 
    853     // Compute offset: i = 0 => offset = Count - 2; i = 2 => offset = Count - 4;
    854     // etc.
    855     const int Offset = (i == Count - 2) ? Count : Count - i - 2;
    856     assert((Offset >= -64 && Offset <= 63) &&
    857            "Offset out of bounds for LDP immediate");
    858     MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
    859     if (LdrOpc == AArch64::LDPXpost || LdrOpc == AArch64::LDPDpost)
    860       MIB.addReg(AArch64::SP, RegState::Define);
    861 
    862     MIB.addReg(Reg2, getDefRegState(true))
    863         .addReg(Reg1, getDefRegState(true))
    864         .addReg(AArch64::SP)
    865         .addImm(Offset); // [sp], #offset * 8  or [sp, #offset * 8]
    866                          // where the factor * 8 is implicit
    867   }
    868   return true;
    869 }
    870 
    871 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
    872                                                 BitVector &SavedRegs,
    873                                                 RegScavenger *RS) const {
    874   // All calls are tail calls in GHC calling conv, and functions have no
    875   // prologue/epilogue.
    876   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
    877     return;
    878 
    879   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
    880   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
    881       MF.getSubtarget().getRegisterInfo());
    882   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
    883   SmallVector<unsigned, 4> UnspilledCSGPRs;
    884   SmallVector<unsigned, 4> UnspilledCSFPRs;
    885 
    886   // The frame record needs to be created by saving the appropriate registers
    887   if (hasFP(MF)) {
    888     SavedRegs.set(AArch64::FP);
    889     SavedRegs.set(AArch64::LR);
    890   }
    891 
    892   // Spill the BasePtr if it's used. Do this first thing so that the
    893   // getCalleeSavedRegs() below will get the right answer.
    894   if (RegInfo->hasBasePointer(MF))
    895     SavedRegs.set(RegInfo->getBaseRegister());
    896 
    897   if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
    898     SavedRegs.set(AArch64::X9);
    899 
    900   // If any callee-saved registers are used, the frame cannot be eliminated.
    901   unsigned NumGPRSpilled = 0;
    902   unsigned NumFPRSpilled = 0;
    903   bool ExtraCSSpill = false;
    904   bool CanEliminateFrame = true;
    905   DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:");
    906   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
    907 
    908   // Check pairs of consecutive callee-saved registers.
    909   for (unsigned i = 0; CSRegs[i]; i += 2) {
    910     assert(CSRegs[i + 1] && "Odd number of callee-saved registers!");
    911 
    912     const unsigned OddReg = CSRegs[i];
    913     const unsigned EvenReg = CSRegs[i + 1];
    914     assert((AArch64::GPR64RegClass.contains(OddReg) &&
    915             AArch64::GPR64RegClass.contains(EvenReg)) ^
    916                (AArch64::FPR64RegClass.contains(OddReg) &&
    917                 AArch64::FPR64RegClass.contains(EvenReg)) &&
    918            "Register class mismatch!");
    919 
    920     const bool OddRegUsed = SavedRegs.test(OddReg);
    921     const bool EvenRegUsed = SavedRegs.test(EvenReg);
    922 
    923     // Early exit if none of the registers in the register pair is actually
    924     // used.
    925     if (!OddRegUsed && !EvenRegUsed) {
    926       if (AArch64::GPR64RegClass.contains(OddReg)) {
    927         UnspilledCSGPRs.push_back(OddReg);
    928         UnspilledCSGPRs.push_back(EvenReg);
    929       } else {
    930         UnspilledCSFPRs.push_back(OddReg);
    931         UnspilledCSFPRs.push_back(EvenReg);
    932       }
    933       continue;
    934     }
    935 
    936     unsigned Reg = AArch64::NoRegister;
    937     // If only one of the registers of the register pair is used, make sure to
    938     // mark the other one as used as well.
    939     if (OddRegUsed ^ EvenRegUsed) {
    940       // Find out which register is the additional spill.
    941       Reg = OddRegUsed ? EvenReg : OddReg;
    942       SavedRegs.set(Reg);
    943     }
    944 
    945     DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo));
    946     DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo));
    947 
    948     assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) ||
    949             (RegInfo->getEncodingValue(OddReg) + 1 ==
    950              RegInfo->getEncodingValue(EvenReg))) &&
    951            "Register pair of non-adjacent registers!");
    952     if (AArch64::GPR64RegClass.contains(OddReg)) {
    953       NumGPRSpilled += 2;
    954       // If it's not a reserved register, we can use it in lieu of an
    955       // emergency spill slot for the register scavenger.
    956       // FIXME: It would be better to instead keep looking and choose another
    957       // unspilled register that isn't reserved, if there is one.
    958       if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg))
    959         ExtraCSSpill = true;
    960     } else
    961       NumFPRSpilled += 2;
    962 
    963     CanEliminateFrame = false;
    964   }
    965 
    966   // FIXME: Set BigStack if any stack slot references may be out of range.
    967   // For now, just conservatively guestimate based on unscaled indexing
    968   // range. We'll end up allocating an unnecessary spill slot a lot, but
    969   // realistically that's not a big deal at this stage of the game.
    970   // The CSR spill slots have not been allocated yet, so estimateStackSize
    971   // won't include them.
    972   MachineFrameInfo *MFI = MF.getFrameInfo();
    973   unsigned CFSize =
    974       MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
    975   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
    976   bool BigStack = (CFSize >= 256);
    977   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
    978     AFI->setHasStackFrame(true);
    979 
    980   // Estimate if we might need to scavenge a register at some point in order
    981   // to materialize a stack offset. If so, either spill one additional
    982   // callee-saved register or reserve a special spill slot to facilitate
    983   // register scavenging. If we already spilled an extra callee-saved register
    984   // above to keep the number of spills even, we don't need to do anything else
    985   // here.
    986   if (BigStack && !ExtraCSSpill) {
    987 
    988     // If we're adding a register to spill here, we have to add two of them
    989     // to keep the number of regs to spill even.
    990     assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!");
    991     unsigned Count = 0;
    992     while (!UnspilledCSGPRs.empty() && Count < 2) {
    993       unsigned Reg = UnspilledCSGPRs.back();
    994       UnspilledCSGPRs.pop_back();
    995       DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo)
    996                    << " to get a scratch register.\n");
    997       SavedRegs.set(Reg);
    998       ExtraCSSpill = true;
    999       ++Count;
   1000     }
   1001 
   1002     // If we didn't find an extra callee-saved register to spill, create
   1003     // an emergency spill slot.
   1004     if (!ExtraCSSpill) {
   1005       const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
   1006       int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
   1007       RS->addScavengingFrameIndex(FI);
   1008       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
   1009                    << " as the emergency spill slot.\n");
   1010     }
   1011   }
   1012 }
   1013