1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | prev_fp, prev_lr | 45 // | (a.k.a. "frame record") | 46 // |-----------------------------------| <- fp(=x29) 47 // | | 48 // | other callee-saved registers | 49 // | | 50 // |-----------------------------------| 51 // |.empty.space.to.make.part.below....| 52 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 53 // |.the.standard.16-byte.alignment....| compile time; if present) 54 // |-----------------------------------| 55 // | | 56 // | local variables of fixed size | 57 // | including spill slots | 58 // |-----------------------------------| <- bp(not defined by ABI, 59 // |.variable-sized.local.variables....| LLVM chooses X19) 60 // |.(VLAs)............................| (size of this area is unknown at 61 // |...................................| compile time) 62 // |-----------------------------------| <- sp 63 // | | Lower address 64 // 65 // 66 // To access the data in a frame, at-compile time, a constant offset must be 67 // computable from one of the pointers (fp, bp, sp) to access it. The size 68 // of the areas with a dotted background cannot be computed at compile-time 69 // if they are present, making it required to have all three of fp, bp and 70 // sp to be set up to be able to access all contents in the frame areas, 71 // assuming all of the frame areas are non-empty. 72 // 73 // For most functions, some of the frame areas are empty. For those functions, 74 // it may not be necessary to set up fp or bp: 75 // * A base pointer is definitely needed when there are both VLAs and local 76 // variables with more-than-default alignment requirements. 77 // * A frame pointer is definitely needed when there are local variables with 78 // more-than-default alignment requirements. 79 // 80 // In some cases when a base pointer is not strictly needed, it is generated 81 // anyway when offsets from the frame pointer to access local variables become 82 // so large that the offset can't be encoded in the immediate fields of loads 83 // or stores. 84 // 85 // FIXME: also explain the redzone concept. 86 // FIXME: also explain the concept of reserved call frames. 87 // 88 //===----------------------------------------------------------------------===// 89 90 #include "AArch64FrameLowering.h" 91 #include "AArch64InstrInfo.h" 92 #include "AArch64MachineFunctionInfo.h" 93 #include "AArch64Subtarget.h" 94 #include "AArch64TargetMachine.h" 95 #include "llvm/ADT/Statistic.h" 96 #include "llvm/CodeGen/LivePhysRegs.h" 97 #include "llvm/CodeGen/MachineFrameInfo.h" 98 #include "llvm/CodeGen/MachineFunction.h" 99 #include "llvm/CodeGen/MachineInstrBuilder.h" 100 #include "llvm/CodeGen/MachineModuleInfo.h" 101 #include "llvm/CodeGen/MachineRegisterInfo.h" 102 #include "llvm/CodeGen/RegisterScavenging.h" 103 #include "llvm/IR/DataLayout.h" 104 #include "llvm/IR/Function.h" 105 #include "llvm/Support/CommandLine.h" 106 #include "llvm/Support/Debug.h" 107 #include "llvm/Support/raw_ostream.h" 108 109 using namespace llvm; 110 111 #define DEBUG_TYPE "frame-info" 112 113 static cl::opt<bool> EnableRedZone("aarch64-redzone", 114 cl::desc("enable use of redzone on AArch64"), 115 cl::init(false), cl::Hidden); 116 117 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 118 119 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 120 if (!EnableRedZone) 121 return false; 122 // Don't use the red zone if the function explicitly asks us not to. 123 // This is typically used for kernel code. 124 if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone)) 125 return false; 126 127 const MachineFrameInfo *MFI = MF.getFrameInfo(); 128 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 129 unsigned NumBytes = AFI->getLocalStackSize(); 130 131 return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128); 132 } 133 134 /// hasFP - Return true if the specified function should have a dedicated frame 135 /// pointer register. 136 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 137 const MachineFrameInfo *MFI = MF.getFrameInfo(); 138 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 139 // Retain behavior of always omitting the FP for leaf functions when possible. 140 return (MFI->hasCalls() && 141 MF.getTarget().Options.DisableFramePointerElim(MF)) || 142 MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() || 143 MFI->hasStackMap() || MFI->hasPatchPoint() || 144 RegInfo->needsStackRealignment(MF); 145 } 146 147 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 148 /// not required, we reserve argument space for call sites in the function 149 /// immediately on entry to the current function. This eliminates the need for 150 /// add/sub sp brackets around call sites. Returns true if the call frame is 151 /// included as part of the stack frame. 152 bool 153 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 154 return !MF.getFrameInfo()->hasVarSizedObjects(); 155 } 156 157 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 158 MachineFunction &MF, MachineBasicBlock &MBB, 159 MachineBasicBlock::iterator I) const { 160 const AArch64InstrInfo *TII = 161 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 162 DebugLoc DL = I->getDebugLoc(); 163 unsigned Opc = I->getOpcode(); 164 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 165 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 166 167 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 168 if (!TFI->hasReservedCallFrame(MF)) { 169 unsigned Align = getStackAlignment(); 170 171 int64_t Amount = I->getOperand(0).getImm(); 172 Amount = alignTo(Amount, Align); 173 if (!IsDestroy) 174 Amount = -Amount; 175 176 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 177 // doesn't have to pop anything), then the first operand will be zero too so 178 // this adjustment is a no-op. 179 if (CalleePopAmount == 0) { 180 // FIXME: in-function stack adjustment for calls is limited to 24-bits 181 // because there's no guaranteed temporary register available. 182 // 183 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 184 // 1) For offset <= 12-bit, we use LSL #0 185 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 186 // LSL #0, and the other uses LSL #12. 187 // 188 // Most call frames will be allocated at the start of a function so 189 // this is OK, but it is a limitation that needs dealing with. 190 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 191 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 192 } 193 } else if (CalleePopAmount != 0) { 194 // If the calling convention demands that the callee pops arguments from the 195 // stack, we want to add it back if we have a reserved call frame. 196 assert(CalleePopAmount < 0xffffff && "call frame too large"); 197 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 198 TII); 199 } 200 return MBB.erase(I); 201 } 202 203 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 204 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 205 MachineFunction &MF = *MBB.getParent(); 206 MachineFrameInfo *MFI = MF.getFrameInfo(); 207 MachineModuleInfo &MMI = MF.getMMI(); 208 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 209 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 210 DebugLoc DL = MBB.findDebugLoc(MBBI); 211 212 // Add callee saved registers to move list. 213 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 214 if (CSI.empty()) 215 return; 216 217 for (const auto &Info : CSI) { 218 unsigned Reg = Info.getReg(); 219 int64_t Offset = 220 MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 221 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 222 unsigned CFIIndex = MMI.addFrameInst( 223 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 224 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 225 .addCFIIndex(CFIIndex) 226 .setMIFlags(MachineInstr::FrameSetup); 227 } 228 } 229 230 // Find a scratch register that we can use at the start of the prologue to 231 // re-align the stack pointer. We avoid using callee-save registers since they 232 // may appear to be free when this is called from canUseAsPrologue (during 233 // shrink wrapping), but then no longer be free when this is called from 234 // emitPrologue. 235 // 236 // FIXME: This is a bit conservative, since in the above case we could use one 237 // of the callee-save registers as a scratch temp to re-align the stack pointer, 238 // but we would then have to make sure that we were in fact saving at least one 239 // callee-save register in the prologue, which is additional complexity that 240 // doesn't seem worth the benefit. 241 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 242 MachineFunction *MF = MBB->getParent(); 243 244 // If MBB is an entry block, use X9 as the scratch register 245 if (&MF->front() == MBB) 246 return AArch64::X9; 247 248 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo(); 249 LivePhysRegs LiveRegs(&TRI); 250 LiveRegs.addLiveIns(*MBB); 251 252 // Mark callee saved registers as used so we will not choose them. 253 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 254 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 255 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MF); 256 for (unsigned i = 0; CSRegs[i]; ++i) 257 LiveRegs.addReg(CSRegs[i]); 258 259 // Prefer X9 since it was historically used for the prologue scratch reg. 260 const MachineRegisterInfo &MRI = MF->getRegInfo(); 261 if (LiveRegs.available(MRI, AArch64::X9)) 262 return AArch64::X9; 263 264 for (unsigned Reg : AArch64::GPR64RegClass) { 265 if (LiveRegs.available(MRI, Reg)) 266 return Reg; 267 } 268 return AArch64::NoRegister; 269 } 270 271 bool AArch64FrameLowering::canUseAsPrologue( 272 const MachineBasicBlock &MBB) const { 273 const MachineFunction *MF = MBB.getParent(); 274 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 275 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 276 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 277 278 // Don't need a scratch register if we're not going to re-align the stack. 279 if (!RegInfo->needsStackRealignment(*MF)) 280 return true; 281 // Otherwise, we can use any block as long as it has a scratch register 282 // available. 283 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 284 } 285 286 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 287 MachineFunction &MF, unsigned StackBumpBytes) const { 288 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 289 const MachineFrameInfo *MFI = MF.getFrameInfo(); 290 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 291 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 292 293 if (AFI->getLocalStackSize() == 0) 294 return false; 295 296 // 512 is the maximum immediate for stp/ldp that will be used for 297 // callee-save save/restores 298 if (StackBumpBytes >= 512) 299 return false; 300 301 if (MFI->hasVarSizedObjects()) 302 return false; 303 304 if (RegInfo->needsStackRealignment(MF)) 305 return false; 306 307 // This isn't strictly necessary, but it simplifies things a bit since the 308 // current RedZone handling code assumes the SP is adjusted by the 309 // callee-save save/restore code. 310 if (canUseRedZone(MF)) 311 return false; 312 313 return true; 314 } 315 316 // Convert callee-save register save/restore instruction to do stack pointer 317 // decrement/increment to allocate/deallocate the callee-save stack area by 318 // converting store/load to use pre/post increment version. 319 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 320 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 321 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { 322 323 unsigned NewOpc; 324 bool NewIsUnscaled = false; 325 switch (MBBI->getOpcode()) { 326 default: 327 llvm_unreachable("Unexpected callee-save save/restore opcode!"); 328 case AArch64::STPXi: 329 NewOpc = AArch64::STPXpre; 330 break; 331 case AArch64::STPDi: 332 NewOpc = AArch64::STPDpre; 333 break; 334 case AArch64::STRXui: 335 NewOpc = AArch64::STRXpre; 336 NewIsUnscaled = true; 337 break; 338 case AArch64::STRDui: 339 NewOpc = AArch64::STRDpre; 340 NewIsUnscaled = true; 341 break; 342 case AArch64::LDPXi: 343 NewOpc = AArch64::LDPXpost; 344 break; 345 case AArch64::LDPDi: 346 NewOpc = AArch64::LDPDpost; 347 break; 348 case AArch64::LDRXui: 349 NewOpc = AArch64::LDRXpost; 350 NewIsUnscaled = true; 351 break; 352 case AArch64::LDRDui: 353 NewOpc = AArch64::LDRDpost; 354 NewIsUnscaled = true; 355 break; 356 } 357 358 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 359 MIB.addReg(AArch64::SP, RegState::Define); 360 361 // Copy all operands other than the immediate offset. 362 unsigned OpndIdx = 0; 363 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 364 ++OpndIdx) 365 MIB.addOperand(MBBI->getOperand(OpndIdx)); 366 367 assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 368 "Unexpected immediate offset in first/last callee-save save/restore " 369 "instruction!"); 370 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 371 "Unexpected base register in callee-save save/restore instruction!"); 372 // Last operand is immediate offset that needs fixing. 373 assert(CSStackSizeInc % 8 == 0); 374 int64_t CSStackSizeIncImm = CSStackSizeInc; 375 if (!NewIsUnscaled) 376 CSStackSizeIncImm /= 8; 377 MIB.addImm(CSStackSizeIncImm); 378 379 MIB.setMIFlags(MBBI->getFlags()); 380 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); 381 382 return std::prev(MBB.erase(MBBI)); 383 } 384 385 // Fixup callee-save register save/restore instructions to take into account 386 // combined SP bump by adding the local stack size to the stack offsets. 387 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 388 unsigned LocalStackSize) { 389 unsigned Opc = MI.getOpcode(); 390 (void)Opc; 391 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || 392 Opc == AArch64::STRXui || Opc == AArch64::STRDui || 393 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || 394 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && 395 "Unexpected callee-save save/restore opcode!"); 396 397 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 398 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 399 "Unexpected base register in callee-save save/restore instruction!"); 400 // Last operand is immediate offset that needs fixing. 401 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 402 // All generated opcodes have scaled offsets. 403 assert(LocalStackSize % 8 == 0); 404 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); 405 } 406 407 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 408 MachineBasicBlock &MBB) const { 409 MachineBasicBlock::iterator MBBI = MBB.begin(); 410 const MachineFrameInfo *MFI = MF.getFrameInfo(); 411 const Function *Fn = MF.getFunction(); 412 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 413 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 414 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 415 MachineModuleInfo &MMI = MF.getMMI(); 416 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 417 bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); 418 bool HasFP = hasFP(MF); 419 420 // Debug location must be unknown since the first debug location is used 421 // to determine the end of the prologue. 422 DebugLoc DL; 423 424 // All calls are tail calls in GHC calling conv, and functions have no 425 // prologue/epilogue. 426 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 427 return; 428 429 int NumBytes = (int)MFI->getStackSize(); 430 if (!AFI->hasStackFrame()) { 431 assert(!HasFP && "unexpected function without stack frame but with FP"); 432 433 // All of the stack allocation is for locals. 434 AFI->setLocalStackSize(NumBytes); 435 436 if (!NumBytes) 437 return; 438 // REDZONE: If the stack size is less than 128 bytes, we don't need 439 // to actually allocate. 440 if (canUseRedZone(MF)) 441 ++NumRedZoneFunctions; 442 else { 443 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 444 MachineInstr::FrameSetup); 445 446 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 447 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 448 // Encode the stack size of the leaf function. 449 unsigned CFIIndex = MMI.addFrameInst( 450 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 451 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 452 .addCFIIndex(CFIIndex) 453 .setMIFlags(MachineInstr::FrameSetup); 454 } 455 return; 456 } 457 458 auto CSStackSize = AFI->getCalleeSavedStackSize(); 459 // All of the remaining stack allocations are for locals. 460 AFI->setLocalStackSize(NumBytes - CSStackSize); 461 462 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 463 if (CombineSPBump) { 464 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 465 MachineInstr::FrameSetup); 466 NumBytes = 0; 467 } else if (CSStackSize != 0) { 468 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, 469 -CSStackSize); 470 NumBytes -= CSStackSize; 471 } 472 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 473 474 // Move past the saves of the callee-saved registers, fixing up the offsets 475 // and pre-inc if we decided to combine the callee-save and local stack 476 // pointer bump above. 477 MachineBasicBlock::iterator End = MBB.end(); 478 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { 479 if (CombineSPBump) 480 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); 481 ++MBBI; 482 } 483 if (HasFP) { 484 // Only set up FP if we actually need to. Frame pointer is fp = sp - 16. 485 int FPOffset = CSStackSize - 16; 486 if (CombineSPBump) 487 FPOffset += AFI->getLocalStackSize(); 488 489 // Issue sub fp, sp, FPOffset or 490 // mov fp,sp when FPOffset is zero. 491 // Note: All stores of callee-saved registers are marked as "FrameSetup". 492 // This code marks the instruction(s) that set the FP also. 493 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 494 MachineInstr::FrameSetup); 495 } 496 497 // Allocate space for the rest of the frame. 498 if (NumBytes) { 499 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 500 unsigned scratchSPReg = AArch64::SP; 501 502 if (NeedsRealignment) { 503 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 504 assert(scratchSPReg != AArch64::NoRegister); 505 } 506 507 // If we're a leaf function, try using the red zone. 508 if (!canUseRedZone(MF)) 509 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 510 // the correct value here, as NumBytes also includes padding bytes, 511 // which shouldn't be counted here. 512 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 513 MachineInstr::FrameSetup); 514 515 if (NeedsRealignment) { 516 const unsigned Alignment = MFI->getMaxAlignment(); 517 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 518 assert(NrBitsToZero > 1); 519 assert(scratchSPReg != AArch64::SP); 520 521 // SUB X9, SP, NumBytes 522 // -- X9 is temporary register, so shouldn't contain any live data here, 523 // -- free to use. This is already produced by emitFrameOffset above. 524 // AND SP, X9, 0b11111...0000 525 // The logical immediates have a non-trivial encoding. The following 526 // formula computes the encoded immediate with all ones but 527 // NrBitsToZero zero bits as least significant bits. 528 uint32_t andMaskEncoded = (1 << 12) // = N 529 | ((64 - NrBitsToZero) << 6) // immr 530 | ((64 - NrBitsToZero - 1) << 0); // imms 531 532 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 533 .addReg(scratchSPReg, RegState::Kill) 534 .addImm(andMaskEncoded); 535 AFI->setStackRealigned(true); 536 } 537 } 538 539 // If we need a base pointer, set it up here. It's whatever the value of the 540 // stack pointer is at this point. Any variable size objects will be allocated 541 // after this, so we can still use the base pointer to reference locals. 542 // 543 // FIXME: Clarify FrameSetup flags here. 544 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 545 // needed. 546 if (RegInfo->hasBasePointer(MF)) { 547 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 548 false); 549 } 550 551 if (needsFrameMoves) { 552 const DataLayout &TD = MF.getDataLayout(); 553 const int StackGrowth = -TD.getPointerSize(0); 554 unsigned FramePtr = RegInfo->getFrameRegister(MF); 555 // An example of the prologue: 556 // 557 // .globl __foo 558 // .align 2 559 // __foo: 560 // Ltmp0: 561 // .cfi_startproc 562 // .cfi_personality 155, ___gxx_personality_v0 563 // Leh_func_begin: 564 // .cfi_lsda 16, Lexception33 565 // 566 // stp xa,bx, [sp, -#offset]! 567 // ... 568 // stp x28, x27, [sp, #offset-32] 569 // stp fp, lr, [sp, #offset-16] 570 // add fp, sp, #offset - 16 571 // sub sp, sp, #1360 572 // 573 // The Stack: 574 // +-------------------------------------------+ 575 // 10000 | ........ | ........ | ........ | ........ | 576 // 10004 | ........ | ........ | ........ | ........ | 577 // +-------------------------------------------+ 578 // 10008 | ........ | ........ | ........ | ........ | 579 // 1000c | ........ | ........ | ........ | ........ | 580 // +===========================================+ 581 // 10010 | X28 Register | 582 // 10014 | X28 Register | 583 // +-------------------------------------------+ 584 // 10018 | X27 Register | 585 // 1001c | X27 Register | 586 // +===========================================+ 587 // 10020 | Frame Pointer | 588 // 10024 | Frame Pointer | 589 // +-------------------------------------------+ 590 // 10028 | Link Register | 591 // 1002c | Link Register | 592 // +===========================================+ 593 // 10030 | ........ | ........ | ........ | ........ | 594 // 10034 | ........ | ........ | ........ | ........ | 595 // +-------------------------------------------+ 596 // 10038 | ........ | ........ | ........ | ........ | 597 // 1003c | ........ | ........ | ........ | ........ | 598 // +-------------------------------------------+ 599 // 600 // [sp] = 10030 :: >>initial value<< 601 // sp = 10020 :: stp fp, lr, [sp, #-16]! 602 // fp = sp == 10020 :: mov fp, sp 603 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 604 // sp == 10010 :: >>final value<< 605 // 606 // The frame pointer (w29) points to address 10020. If we use an offset of 607 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 608 // for w27, and -32 for w28: 609 // 610 // Ltmp1: 611 // .cfi_def_cfa w29, 16 612 // Ltmp2: 613 // .cfi_offset w30, -8 614 // Ltmp3: 615 // .cfi_offset w29, -16 616 // Ltmp4: 617 // .cfi_offset w27, -24 618 // Ltmp5: 619 // .cfi_offset w28, -32 620 621 if (HasFP) { 622 // Define the current CFA rule to use the provided FP. 623 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 624 unsigned CFIIndex = MMI.addFrameInst( 625 MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth)); 626 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 627 .addCFIIndex(CFIIndex) 628 .setMIFlags(MachineInstr::FrameSetup); 629 } else { 630 // Encode the stack size of the leaf function. 631 unsigned CFIIndex = MMI.addFrameInst( 632 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize())); 633 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 634 .addCFIIndex(CFIIndex) 635 .setMIFlags(MachineInstr::FrameSetup); 636 } 637 638 // Now emit the moves for whatever callee saved regs we have (including FP, 639 // LR if those are saved). 640 emitCalleeSavedFrameMoves(MBB, MBBI); 641 } 642 } 643 644 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 645 MachineBasicBlock &MBB) const { 646 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 647 MachineFrameInfo *MFI = MF.getFrameInfo(); 648 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 649 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 650 DebugLoc DL; 651 bool IsTailCallReturn = false; 652 if (MBB.end() != MBBI) { 653 DL = MBBI->getDebugLoc(); 654 unsigned RetOpcode = MBBI->getOpcode(); 655 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 656 RetOpcode == AArch64::TCRETURNri; 657 } 658 int NumBytes = MFI->getStackSize(); 659 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 660 661 // All calls are tail calls in GHC calling conv, and functions have no 662 // prologue/epilogue. 663 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 664 return; 665 666 // Initial and residual are named for consistency with the prologue. Note that 667 // in the epilogue, the residual adjustment is executed first. 668 uint64_t ArgumentPopSize = 0; 669 if (IsTailCallReturn) { 670 MachineOperand &StackAdjust = MBBI->getOperand(1); 671 672 // For a tail-call in a callee-pops-arguments environment, some or all of 673 // the stack may actually be in use for the call's arguments, this is 674 // calculated during LowerCall and consumed here... 675 ArgumentPopSize = StackAdjust.getImm(); 676 } else { 677 // ... otherwise the amount to pop is *all* of the argument space, 678 // conveniently stored in the MachineFunctionInfo by 679 // LowerFormalArguments. This will, of course, be zero for the C calling 680 // convention. 681 ArgumentPopSize = AFI->getArgumentStackToRestore(); 682 } 683 684 // The stack frame should be like below, 685 // 686 // ---------------------- --- 687 // | | | 688 // | BytesInStackArgArea| CalleeArgStackSize 689 // | (NumReusableBytes) | (of tail call) 690 // | | --- 691 // | | | 692 // ---------------------| --- | 693 // | | | | 694 // | CalleeSavedReg | | | 695 // | (CalleeSavedStackSize)| | | 696 // | | | | 697 // ---------------------| | NumBytes 698 // | | StackSize (StackAdjustUp) 699 // | LocalStackSize | | | 700 // | (covering callee | | | 701 // | args) | | | 702 // | | | | 703 // ---------------------- --- --- 704 // 705 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 706 // = StackSize + ArgumentPopSize 707 // 708 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 709 // it as the 2nd argument of AArch64ISD::TC_RETURN. 710 711 auto CSStackSize = AFI->getCalleeSavedStackSize(); 712 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 713 714 if (!CombineSPBump && CSStackSize != 0) 715 convertCalleeSaveRestoreToSPPrePostIncDec( 716 MBB, std::prev(MBB.getFirstTerminator()), DL, TII, CSStackSize); 717 718 // Move past the restores of the callee-saved registers. 719 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 720 MachineBasicBlock::iterator Begin = MBB.begin(); 721 while (LastPopI != Begin) { 722 --LastPopI; 723 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 724 ++LastPopI; 725 break; 726 } else if (CombineSPBump) 727 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); 728 } 729 730 // If there is a single SP update, insert it before the ret and we're done. 731 if (CombineSPBump) { 732 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 733 NumBytes + ArgumentPopSize, TII, 734 MachineInstr::FrameDestroy); 735 return; 736 } 737 738 NumBytes -= CSStackSize; 739 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 740 741 if (!hasFP(MF)) { 742 bool RedZone = canUseRedZone(MF); 743 // If this was a redzone leaf function, we don't need to restore the 744 // stack pointer (but we may need to pop stack args for fastcc). 745 if (RedZone && ArgumentPopSize == 0) 746 return; 747 748 bool NoCalleeSaveRestore = CSStackSize == 0; 749 int StackRestoreBytes = RedZone ? 0 : NumBytes; 750 if (NoCalleeSaveRestore) 751 StackRestoreBytes += ArgumentPopSize; 752 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 753 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 754 // If we were able to combine the local stack pop with the argument pop, 755 // then we're done. 756 if (NoCalleeSaveRestore || ArgumentPopSize == 0) 757 return; 758 NumBytes = 0; 759 } 760 761 // Restore the original stack pointer. 762 // FIXME: Rather than doing the math here, we should instead just use 763 // non-post-indexed loads for the restores if we aren't actually going to 764 // be able to save any instructions. 765 if (MFI->hasVarSizedObjects() || AFI->isStackRealigned()) 766 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 767 -CSStackSize + 16, TII, MachineInstr::FrameDestroy); 768 else if (NumBytes) 769 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 770 MachineInstr::FrameDestroy); 771 772 // This must be placed after the callee-save restore code because that code 773 // assumes the SP is at the same location as it was after the callee-save save 774 // code in the prologue. 775 if (ArgumentPopSize) 776 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 777 ArgumentPopSize, TII, MachineInstr::FrameDestroy); 778 } 779 780 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 781 /// debug info. It's the same as what we use for resolving the code-gen 782 /// references for now. FIXME: This can go wrong when references are 783 /// SP-relative and simple call frames aren't used. 784 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 785 int FI, 786 unsigned &FrameReg) const { 787 return resolveFrameIndexReference(MF, FI, FrameReg); 788 } 789 790 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 791 int FI, unsigned &FrameReg, 792 bool PreferFP) const { 793 const MachineFrameInfo *MFI = MF.getFrameInfo(); 794 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 795 MF.getSubtarget().getRegisterInfo()); 796 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 797 int FPOffset = MFI->getObjectOffset(FI) + 16; 798 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 799 bool isFixed = MFI->isFixedObjectIndex(FI); 800 801 // Use frame pointer to reference fixed objects. Use it for locals if 802 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 803 // reliable as a base). Make sure useFPForScavengingIndex() does the 804 // right thing for the emergency spill slot. 805 bool UseFP = false; 806 if (AFI->hasStackFrame()) { 807 // Note: Keeping the following as multiple 'if' statements rather than 808 // merging to a single expression for readability. 809 // 810 // Argument access should always use the FP. 811 if (isFixed) { 812 UseFP = hasFP(MF); 813 } else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) && 814 !RegInfo->needsStackRealignment(MF)) { 815 // Use SP or FP, whichever gives us the best chance of the offset 816 // being in range for direct access. If the FPOffset is positive, 817 // that'll always be best, as the SP will be even further away. 818 // If the FPOffset is negative, we have to keep in mind that the 819 // available offset range for negative offsets is smaller than for 820 // positive ones. If we have variable sized objects, we're stuck with 821 // using the FP regardless, though, as the SP offset is unknown 822 // and we don't have a base pointer available. If an offset is 823 // available via the FP and the SP, use whichever is closest. 824 if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 || 825 (FPOffset >= -256 && Offset > -FPOffset)) 826 UseFP = true; 827 } 828 } 829 830 assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) && 831 "In the presence of dynamic stack pointer realignment, " 832 "non-argument objects cannot be accessed through the frame pointer"); 833 834 if (UseFP) { 835 FrameReg = RegInfo->getFrameRegister(MF); 836 return FPOffset; 837 } 838 839 // Use the base pointer if we have one. 840 if (RegInfo->hasBasePointer(MF)) 841 FrameReg = RegInfo->getBaseRegister(); 842 else { 843 FrameReg = AArch64::SP; 844 // If we're using the red zone for this function, the SP won't actually 845 // be adjusted, so the offsets will be negative. They're also all 846 // within range of the signed 9-bit immediate instructions. 847 if (canUseRedZone(MF)) 848 Offset -= AFI->getLocalStackSize(); 849 } 850 851 return Offset; 852 } 853 854 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 855 // Do not set a kill flag on values that are also marked as live-in. This 856 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 857 // callee saved registers. 858 // Omitting the kill flags is conservatively correct even if the live-in 859 // is not used after all. 860 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 861 return getKillRegState(!IsLiveIn); 862 } 863 864 static bool produceCompactUnwindFrame(MachineFunction &MF) { 865 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 866 AttributeSet Attrs = MF.getFunction()->getAttributes(); 867 return Subtarget.isTargetMachO() && 868 !(Subtarget.getTargetLowering()->supportSwiftError() && 869 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 870 } 871 872 873 struct RegPairInfo { 874 RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {} 875 unsigned Reg1; 876 unsigned Reg2; 877 int FrameIdx; 878 int Offset; 879 bool IsGPR; 880 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 881 }; 882 883 static void computeCalleeSaveRegisterPairs( 884 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 885 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs) { 886 887 if (CSI.empty()) 888 return; 889 890 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 891 MachineFrameInfo *MFI = MF.getFrameInfo(); 892 CallingConv::ID CC = MF.getFunction()->getCallingConv(); 893 unsigned Count = CSI.size(); 894 (void)CC; 895 // MachO's compact unwind format relies on all registers being stored in 896 // pairs. 897 assert((!produceCompactUnwindFrame(MF) || 898 CC == CallingConv::PreserveMost || 899 (Count & 1) == 0) && 900 "Odd number of callee-saved regs to spill!"); 901 unsigned Offset = AFI->getCalleeSavedStackSize(); 902 903 for (unsigned i = 0; i < Count; ++i) { 904 RegPairInfo RPI; 905 RPI.Reg1 = CSI[i].getReg(); 906 907 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 908 AArch64::FPR64RegClass.contains(RPI.Reg1)); 909 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 910 911 // Add the next reg to the pair if it is in the same register class. 912 if (i + 1 < Count) { 913 unsigned NextReg = CSI[i + 1].getReg(); 914 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 915 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 916 RPI.Reg2 = NextReg; 917 } 918 919 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 920 // list to come in sorted by frame index so that we can issue the store 921 // pair instructions directly. Assert if we see anything otherwise. 922 // 923 // The order of the registers in the list is controlled by 924 // getCalleeSavedRegs(), so they will always be in-order, as well. 925 assert((!RPI.isPaired() || 926 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 927 "Out of order callee saved regs!"); 928 929 // MachO's compact unwind format relies on all registers being stored in 930 // adjacent register pairs. 931 assert((!produceCompactUnwindFrame(MF) || 932 CC == CallingConv::PreserveMost || 933 (RPI.isPaired() && 934 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 935 RPI.Reg1 + 1 == RPI.Reg2))) && 936 "Callee-save registers not saved as adjacent register pair!"); 937 938 RPI.FrameIdx = CSI[i].getFrameIdx(); 939 940 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 941 // Round up size of non-pair to pair size if we need to pad the 942 // callee-save area to ensure 16-byte alignment. 943 Offset -= 16; 944 assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); 945 MFI->setObjectAlignment(RPI.FrameIdx, 16); 946 AFI->setCalleeSaveStackHasFreeSpace(true); 947 } else 948 Offset -= RPI.isPaired() ? 16 : 8; 949 assert(Offset % 8 == 0); 950 RPI.Offset = Offset / 8; 951 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 952 "Offset out of bounds for LDP/STP immediate"); 953 954 RegPairs.push_back(RPI); 955 if (RPI.isPaired()) 956 ++i; 957 } 958 } 959 960 bool AArch64FrameLowering::spillCalleeSavedRegisters( 961 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 962 const std::vector<CalleeSavedInfo> &CSI, 963 const TargetRegisterInfo *TRI) const { 964 MachineFunction &MF = *MBB.getParent(); 965 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 966 DebugLoc DL; 967 SmallVector<RegPairInfo, 8> RegPairs; 968 969 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 970 971 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 972 ++RPII) { 973 RegPairInfo RPI = *RPII; 974 unsigned Reg1 = RPI.Reg1; 975 unsigned Reg2 = RPI.Reg2; 976 unsigned StrOpc; 977 978 // Issue sequence of spills for cs regs. The first spill may be converted 979 // to a pre-decrement store later by emitPrologue if the callee-save stack 980 // area allocation can't be combined with the local stack area allocation. 981 // For example: 982 // stp x22, x21, [sp, #0] // addImm(+0) 983 // stp x20, x19, [sp, #16] // addImm(+2) 984 // stp fp, lr, [sp, #32] // addImm(+4) 985 // Rationale: This sequence saves uop updates compared to a sequence of 986 // pre-increment spills like stp xi,xj,[sp,#-16]! 987 // Note: Similar rationale and sequence for restores in epilog. 988 if (RPI.IsGPR) 989 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 990 else 991 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 992 DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); 993 if (RPI.isPaired()) 994 dbgs() << ", " << TRI->getName(Reg2); 995 dbgs() << ") -> fi#(" << RPI.FrameIdx; 996 if (RPI.isPaired()) 997 dbgs() << ", " << RPI.FrameIdx+1; 998 dbgs() << ")\n"); 999 1000 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 1001 MBB.addLiveIn(Reg1); 1002 if (RPI.isPaired()) { 1003 MBB.addLiveIn(Reg2); 1004 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 1005 MIB.addMemOperand(MF.getMachineMemOperand( 1006 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1007 MachineMemOperand::MOStore, 8, 8)); 1008 } 1009 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 1010 .addReg(AArch64::SP) 1011 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit 1012 .setMIFlag(MachineInstr::FrameSetup); 1013 MIB.addMemOperand(MF.getMachineMemOperand( 1014 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1015 MachineMemOperand::MOStore, 8, 8)); 1016 } 1017 return true; 1018 } 1019 1020 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 1021 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1022 const std::vector<CalleeSavedInfo> &CSI, 1023 const TargetRegisterInfo *TRI) const { 1024 MachineFunction &MF = *MBB.getParent(); 1025 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1026 DebugLoc DL; 1027 SmallVector<RegPairInfo, 8> RegPairs; 1028 1029 if (MI != MBB.end()) 1030 DL = MI->getDebugLoc(); 1031 1032 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); 1033 1034 for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; 1035 ++RPII) { 1036 RegPairInfo RPI = *RPII; 1037 unsigned Reg1 = RPI.Reg1; 1038 unsigned Reg2 = RPI.Reg2; 1039 1040 // Issue sequence of restores for cs regs. The last restore may be converted 1041 // to a post-increment load later by emitEpilogue if the callee-save stack 1042 // area allocation can't be combined with the local stack area allocation. 1043 // For example: 1044 // ldp fp, lr, [sp, #32] // addImm(+4) 1045 // ldp x20, x19, [sp, #16] // addImm(+2) 1046 // ldp x22, x21, [sp, #0] // addImm(+0) 1047 // Note: see comment in spillCalleeSavedRegisters() 1048 unsigned LdrOpc; 1049 if (RPI.IsGPR) 1050 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 1051 else 1052 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 1053 DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); 1054 if (RPI.isPaired()) 1055 dbgs() << ", " << TRI->getName(Reg2); 1056 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1057 if (RPI.isPaired()) 1058 dbgs() << ", " << RPI.FrameIdx+1; 1059 dbgs() << ")\n"); 1060 1061 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 1062 if (RPI.isPaired()) { 1063 MIB.addReg(Reg2, getDefRegState(true)); 1064 MIB.addMemOperand(MF.getMachineMemOperand( 1065 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1066 MachineMemOperand::MOLoad, 8, 8)); 1067 } 1068 MIB.addReg(Reg1, getDefRegState(true)) 1069 .addReg(AArch64::SP) 1070 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit 1071 .setMIFlag(MachineInstr::FrameDestroy); 1072 MIB.addMemOperand(MF.getMachineMemOperand( 1073 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1074 MachineMemOperand::MOLoad, 8, 8)); 1075 } 1076 return true; 1077 } 1078 1079 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 1080 BitVector &SavedRegs, 1081 RegScavenger *RS) const { 1082 // All calls are tail calls in GHC calling conv, and functions have no 1083 // prologue/epilogue. 1084 if (MF.getFunction()->getCallingConv() == CallingConv::GHC) 1085 return; 1086 1087 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1088 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1089 MF.getSubtarget().getRegisterInfo()); 1090 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1091 unsigned UnspilledCSGPR = AArch64::NoRegister; 1092 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 1093 1094 // The frame record needs to be created by saving the appropriate registers 1095 if (hasFP(MF)) { 1096 SavedRegs.set(AArch64::FP); 1097 SavedRegs.set(AArch64::LR); 1098 } 1099 1100 unsigned BasePointerReg = AArch64::NoRegister; 1101 if (RegInfo->hasBasePointer(MF)) 1102 BasePointerReg = RegInfo->getBaseRegister(); 1103 1104 bool ExtraCSSpill = false; 1105 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1106 // Figure out which callee-saved registers to save/restore. 1107 for (unsigned i = 0; CSRegs[i]; ++i) { 1108 const unsigned Reg = CSRegs[i]; 1109 1110 // Add the base pointer register to SavedRegs if it is callee-save. 1111 if (Reg == BasePointerReg) 1112 SavedRegs.set(Reg); 1113 1114 bool RegUsed = SavedRegs.test(Reg); 1115 unsigned PairedReg = CSRegs[i ^ 1]; 1116 if (!RegUsed) { 1117 if (AArch64::GPR64RegClass.contains(Reg) && 1118 !RegInfo->isReservedReg(MF, Reg)) { 1119 UnspilledCSGPR = Reg; 1120 UnspilledCSGPRPaired = PairedReg; 1121 } 1122 continue; 1123 } 1124 1125 // MachO's compact unwind format relies on all registers being stored in 1126 // pairs. 1127 // FIXME: the usual format is actually better if unwinding isn't needed. 1128 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1129 SavedRegs.set(PairedReg); 1130 if (AArch64::GPR64RegClass.contains(PairedReg) && 1131 !RegInfo->isReservedReg(MF, PairedReg)) 1132 ExtraCSSpill = true; 1133 } 1134 } 1135 1136 DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1137 for (int Reg = SavedRegs.find_first(); Reg != -1; 1138 Reg = SavedRegs.find_next(Reg)) 1139 dbgs() << ' ' << PrintReg(Reg, RegInfo); 1140 dbgs() << "\n";); 1141 1142 // If any callee-saved registers are used, the frame cannot be eliminated. 1143 unsigned NumRegsSpilled = SavedRegs.count(); 1144 bool CanEliminateFrame = NumRegsSpilled == 0; 1145 1146 // FIXME: Set BigStack if any stack slot references may be out of range. 1147 // For now, just conservatively guestimate based on unscaled indexing 1148 // range. We'll end up allocating an unnecessary spill slot a lot, but 1149 // realistically that's not a big deal at this stage of the game. 1150 // The CSR spill slots have not been allocated yet, so estimateStackSize 1151 // won't include them. 1152 MachineFrameInfo *MFI = MF.getFrameInfo(); 1153 unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled; 1154 DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1155 bool BigStack = (CFSize >= 256); 1156 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1157 AFI->setHasStackFrame(true); 1158 1159 // Estimate if we might need to scavenge a register at some point in order 1160 // to materialize a stack offset. If so, either spill one additional 1161 // callee-saved register or reserve a special spill slot to facilitate 1162 // register scavenging. If we already spilled an extra callee-saved register 1163 // above to keep the number of spills even, we don't need to do anything else 1164 // here. 1165 if (BigStack && !ExtraCSSpill) { 1166 if (UnspilledCSGPR != AArch64::NoRegister) { 1167 DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) 1168 << " to get a scratch register.\n"); 1169 SavedRegs.set(UnspilledCSGPR); 1170 // MachO's compact unwind format relies on all registers being stored in 1171 // pairs, so if we need to spill one extra for BigStack, then we need to 1172 // store the pair. 1173 if (produceCompactUnwindFrame(MF)) 1174 SavedRegs.set(UnspilledCSGPRPaired); 1175 ExtraCSSpill = true; 1176 NumRegsSpilled = SavedRegs.count(); 1177 } 1178 1179 // If we didn't find an extra callee-saved register to spill, create 1180 // an emergency spill slot. 1181 if (!ExtraCSSpill) { 1182 const TargetRegisterClass *RC = &AArch64::GPR64RegClass; 1183 int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false); 1184 RS->addScavengingFrameIndex(FI); 1185 DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1186 << " as the emergency spill slot.\n"); 1187 } 1188 } 1189 1190 // Round up to register pair alignment to avoid additional SP adjustment 1191 // instructions. 1192 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1193 } 1194 1195 bool AArch64FrameLowering::enableStackSlotScavenging( 1196 const MachineFunction &MF) const { 1197 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1198 return AFI->hasCalleeSaveStackFreeSpace(); 1199 } 1200