1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the AArch64 implementation of TargetFrameLowering class. 11 // 12 // On AArch64, stack frames are structured as follows: 13 // 14 // The stack grows downward. 15 // 16 // All of the individual frame areas on the frame below are optional, i.e. it's 17 // possible to create a function so that the particular area isn't present 18 // in the frame. 19 // 20 // At function entry, the "frame" looks as follows: 21 // 22 // | | Higher address 23 // |-----------------------------------| 24 // | | 25 // | arguments passed on the stack | 26 // | | 27 // |-----------------------------------| <- sp 28 // | | Lower address 29 // 30 // 31 // After the prologue has run, the frame has the following general structure. 32 // Note that this doesn't depict the case where a red-zone is used. Also, 33 // technically the last frame area (VLAs) doesn't get created until in the 34 // main function body, after the prologue is run. However, it's depicted here 35 // for completeness. 36 // 37 // | | Higher address 38 // |-----------------------------------| 39 // | | 40 // | arguments passed on the stack | 41 // | | 42 // |-----------------------------------| 43 // | | 44 // | (Win64 only) varargs from reg | 45 // | | 46 // |-----------------------------------| 47 // | | 48 // | prev_fp, prev_lr | 49 // | (a.k.a. "frame record") | 50 // |-----------------------------------| <- fp(=x29) 51 // | | 52 // | other callee-saved registers | 53 // | | 54 // |-----------------------------------| 55 // |.empty.space.to.make.part.below....| 56 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at 57 // |.the.standard.16-byte.alignment....| compile time; if present) 58 // |-----------------------------------| 59 // | | 60 // | local variables of fixed size | 61 // | including spill slots | 62 // |-----------------------------------| <- bp(not defined by ABI, 63 // |.variable-sized.local.variables....| LLVM chooses X19) 64 // |.(VLAs)............................| (size of this area is unknown at 65 // |...................................| compile time) 66 // |-----------------------------------| <- sp 67 // | | Lower address 68 // 69 // 70 // To access the data in a frame, at-compile time, a constant offset must be 71 // computable from one of the pointers (fp, bp, sp) to access it. The size 72 // of the areas with a dotted background cannot be computed at compile-time 73 // if they are present, making it required to have all three of fp, bp and 74 // sp to be set up to be able to access all contents in the frame areas, 75 // assuming all of the frame areas are non-empty. 76 // 77 // For most functions, some of the frame areas are empty. For those functions, 78 // it may not be necessary to set up fp or bp: 79 // * A base pointer is definitely needed when there are both VLAs and local 80 // variables with more-than-default alignment requirements. 81 // * A frame pointer is definitely needed when there are local variables with 82 // more-than-default alignment requirements. 83 // 84 // In some cases when a base pointer is not strictly needed, it is generated 85 // anyway when offsets from the frame pointer to access local variables become 86 // so large that the offset can't be encoded in the immediate fields of loads 87 // or stores. 88 // 89 // FIXME: also explain the redzone concept. 90 // FIXME: also explain the concept of reserved call frames. 91 // 92 //===----------------------------------------------------------------------===// 93 94 #include "AArch64FrameLowering.h" 95 #include "AArch64InstrInfo.h" 96 #include "AArch64MachineFunctionInfo.h" 97 #include "AArch64RegisterInfo.h" 98 #include "AArch64Subtarget.h" 99 #include "AArch64TargetMachine.h" 100 #include "MCTargetDesc/AArch64AddressingModes.h" 101 #include "llvm/ADT/SmallVector.h" 102 #include "llvm/ADT/Statistic.h" 103 #include "llvm/CodeGen/LivePhysRegs.h" 104 #include "llvm/CodeGen/MachineBasicBlock.h" 105 #include "llvm/CodeGen/MachineFrameInfo.h" 106 #include "llvm/CodeGen/MachineFunction.h" 107 #include "llvm/CodeGen/MachineInstr.h" 108 #include "llvm/CodeGen/MachineInstrBuilder.h" 109 #include "llvm/CodeGen/MachineMemOperand.h" 110 #include "llvm/CodeGen/MachineModuleInfo.h" 111 #include "llvm/CodeGen/MachineOperand.h" 112 #include "llvm/CodeGen/MachineRegisterInfo.h" 113 #include "llvm/CodeGen/RegisterScavenging.h" 114 #include "llvm/CodeGen/TargetInstrInfo.h" 115 #include "llvm/CodeGen/TargetRegisterInfo.h" 116 #include "llvm/CodeGen/TargetSubtargetInfo.h" 117 #include "llvm/IR/Attributes.h" 118 #include "llvm/IR/CallingConv.h" 119 #include "llvm/IR/DataLayout.h" 120 #include "llvm/IR/DebugLoc.h" 121 #include "llvm/IR/Function.h" 122 #include "llvm/MC/MCDwarf.h" 123 #include "llvm/Support/CommandLine.h" 124 #include "llvm/Support/Debug.h" 125 #include "llvm/Support/ErrorHandling.h" 126 #include "llvm/Support/MathExtras.h" 127 #include "llvm/Support/raw_ostream.h" 128 #include "llvm/Target/TargetMachine.h" 129 #include "llvm/Target/TargetOptions.h" 130 #include <cassert> 131 #include <cstdint> 132 #include <iterator> 133 #include <vector> 134 135 using namespace llvm; 136 137 #define DEBUG_TYPE "frame-info" 138 139 static cl::opt<bool> EnableRedZone("aarch64-redzone", 140 cl::desc("enable use of redzone on AArch64"), 141 cl::init(false), cl::Hidden); 142 143 static cl::opt<bool> 144 ReverseCSRRestoreSeq("reverse-csr-restore-seq", 145 cl::desc("reverse the CSR restore sequence"), 146 cl::init(false), cl::Hidden); 147 148 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone"); 149 150 /// This is the biggest offset to the stack pointer we can encode in aarch64 151 /// instructions (without using a separate calculation and a temp register). 152 /// Note that the exception here are vector stores/loads which cannot encode any 153 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()). 154 static const unsigned DefaultSafeSPDisplacement = 255; 155 156 /// Look at each instruction that references stack frames and return the stack 157 /// size limit beyond which some of these instructions will require a scratch 158 /// register during their expansion later. 159 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) { 160 // FIXME: For now, just conservatively guestimate based on unscaled indexing 161 // range. We'll end up allocating an unnecessary spill slot a lot, but 162 // realistically that's not a big deal at this stage of the game. 163 for (MachineBasicBlock &MBB : MF) { 164 for (MachineInstr &MI : MBB) { 165 if (MI.isDebugInstr() || MI.isPseudo() || 166 MI.getOpcode() == AArch64::ADDXri || 167 MI.getOpcode() == AArch64::ADDSXri) 168 continue; 169 170 for (const MachineOperand &MO : MI.operands()) { 171 if (!MO.isFI()) 172 continue; 173 174 int Offset = 0; 175 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) == 176 AArch64FrameOffsetCannotUpdate) 177 return 0; 178 } 179 } 180 } 181 return DefaultSafeSPDisplacement; 182 } 183 184 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const { 185 if (!EnableRedZone) 186 return false; 187 // Don't use the red zone if the function explicitly asks us not to. 188 // This is typically used for kernel code. 189 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone)) 190 return false; 191 192 const MachineFrameInfo &MFI = MF.getFrameInfo(); 193 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 194 unsigned NumBytes = AFI->getLocalStackSize(); 195 196 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128); 197 } 198 199 /// hasFP - Return true if the specified function should have a dedicated frame 200 /// pointer register. 201 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const { 202 const MachineFrameInfo &MFI = MF.getFrameInfo(); 203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo(); 204 // Retain behavior of always omitting the FP for leaf functions when possible. 205 if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF)) 206 return true; 207 if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() || 208 MFI.hasStackMap() || MFI.hasPatchPoint() || 209 RegInfo->needsStackRealignment(MF)) 210 return true; 211 // With large callframes around we may need to use FP to access the scavenging 212 // emergency spillslot. 213 // 214 // Unfortunately some calls to hasFP() like machine verifier -> 215 // getReservedReg() -> hasFP in the middle of global isel are too early 216 // to know the max call frame size. Hopefully conservatively returning "true" 217 // in those cases is fine. 218 // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs. 219 if (!MFI.isMaxCallFrameSizeComputed() || 220 MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement) 221 return true; 222 223 return false; 224 } 225 226 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is 227 /// not required, we reserve argument space for call sites in the function 228 /// immediately on entry to the current function. This eliminates the need for 229 /// add/sub sp brackets around call sites. Returns true if the call frame is 230 /// included as part of the stack frame. 231 bool 232 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 233 return !MF.getFrameInfo().hasVarSizedObjects(); 234 } 235 236 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr( 237 MachineFunction &MF, MachineBasicBlock &MBB, 238 MachineBasicBlock::iterator I) const { 239 const AArch64InstrInfo *TII = 240 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 241 DebugLoc DL = I->getDebugLoc(); 242 unsigned Opc = I->getOpcode(); 243 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode(); 244 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0; 245 246 const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); 247 if (!TFI->hasReservedCallFrame(MF)) { 248 unsigned Align = getStackAlignment(); 249 250 int64_t Amount = I->getOperand(0).getImm(); 251 Amount = alignTo(Amount, Align); 252 if (!IsDestroy) 253 Amount = -Amount; 254 255 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it 256 // doesn't have to pop anything), then the first operand will be zero too so 257 // this adjustment is a no-op. 258 if (CalleePopAmount == 0) { 259 // FIXME: in-function stack adjustment for calls is limited to 24-bits 260 // because there's no guaranteed temporary register available. 261 // 262 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available. 263 // 1) For offset <= 12-bit, we use LSL #0 264 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses 265 // LSL #0, and the other uses LSL #12. 266 // 267 // Most call frames will be allocated at the start of a function so 268 // this is OK, but it is a limitation that needs dealing with. 269 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large"); 270 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, Amount, TII); 271 } 272 } else if (CalleePopAmount != 0) { 273 // If the calling convention demands that the callee pops arguments from the 274 // stack, we want to add it back if we have a reserved call frame. 275 assert(CalleePopAmount < 0xffffff && "call frame too large"); 276 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, -CalleePopAmount, 277 TII); 278 } 279 return MBB.erase(I); 280 } 281 282 void AArch64FrameLowering::emitCalleeSavedFrameMoves( 283 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { 284 MachineFunction &MF = *MBB.getParent(); 285 MachineFrameInfo &MFI = MF.getFrameInfo(); 286 const TargetSubtargetInfo &STI = MF.getSubtarget(); 287 const MCRegisterInfo *MRI = STI.getRegisterInfo(); 288 const TargetInstrInfo *TII = STI.getInstrInfo(); 289 DebugLoc DL = MBB.findDebugLoc(MBBI); 290 291 // Add callee saved registers to move list. 292 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 293 if (CSI.empty()) 294 return; 295 296 for (const auto &Info : CSI) { 297 unsigned Reg = Info.getReg(); 298 int64_t Offset = 299 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea(); 300 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); 301 unsigned CFIIndex = MF.addFrameInst( 302 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); 303 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 304 .addCFIIndex(CFIIndex) 305 .setMIFlags(MachineInstr::FrameSetup); 306 } 307 } 308 309 // Find a scratch register that we can use at the start of the prologue to 310 // re-align the stack pointer. We avoid using callee-save registers since they 311 // may appear to be free when this is called from canUseAsPrologue (during 312 // shrink wrapping), but then no longer be free when this is called from 313 // emitPrologue. 314 // 315 // FIXME: This is a bit conservative, since in the above case we could use one 316 // of the callee-save registers as a scratch temp to re-align the stack pointer, 317 // but we would then have to make sure that we were in fact saving at least one 318 // callee-save register in the prologue, which is additional complexity that 319 // doesn't seem worth the benefit. 320 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) { 321 MachineFunction *MF = MBB->getParent(); 322 323 // If MBB is an entry block, use X9 as the scratch register 324 if (&MF->front() == MBB) 325 return AArch64::X9; 326 327 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 328 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo(); 329 LivePhysRegs LiveRegs(TRI); 330 LiveRegs.addLiveIns(*MBB); 331 332 // Mark callee saved registers as used so we will not choose them. 333 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(MF); 334 for (unsigned i = 0; CSRegs[i]; ++i) 335 LiveRegs.addReg(CSRegs[i]); 336 337 // Prefer X9 since it was historically used for the prologue scratch reg. 338 const MachineRegisterInfo &MRI = MF->getRegInfo(); 339 if (LiveRegs.available(MRI, AArch64::X9)) 340 return AArch64::X9; 341 342 for (unsigned Reg : AArch64::GPR64RegClass) { 343 if (LiveRegs.available(MRI, Reg)) 344 return Reg; 345 } 346 return AArch64::NoRegister; 347 } 348 349 bool AArch64FrameLowering::canUseAsPrologue( 350 const MachineBasicBlock &MBB) const { 351 const MachineFunction *MF = MBB.getParent(); 352 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 353 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>(); 354 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 355 356 // Don't need a scratch register if we're not going to re-align the stack. 357 if (!RegInfo->needsStackRealignment(*MF)) 358 return true; 359 // Otherwise, we can use any block as long as it has a scratch register 360 // available. 361 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister; 362 } 363 364 static bool windowsRequiresStackProbe(MachineFunction &MF, 365 unsigned StackSizeInBytes) { 366 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 367 if (!Subtarget.isTargetWindows()) 368 return false; 369 const Function &F = MF.getFunction(); 370 // TODO: When implementing stack protectors, take that into account 371 // for the probe threshold. 372 unsigned StackProbeSize = 4096; 373 if (F.hasFnAttribute("stack-probe-size")) 374 F.getFnAttribute("stack-probe-size") 375 .getValueAsString() 376 .getAsInteger(0, StackProbeSize); 377 return (StackSizeInBytes >= StackProbeSize) && 378 !F.hasFnAttribute("no-stack-arg-probe"); 379 } 380 381 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump( 382 MachineFunction &MF, unsigned StackBumpBytes) const { 383 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 384 const MachineFrameInfo &MFI = MF.getFrameInfo(); 385 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 386 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 387 388 if (AFI->getLocalStackSize() == 0) 389 return false; 390 391 // 512 is the maximum immediate for stp/ldp that will be used for 392 // callee-save save/restores 393 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes)) 394 return false; 395 396 if (MFI.hasVarSizedObjects()) 397 return false; 398 399 if (RegInfo->needsStackRealignment(MF)) 400 return false; 401 402 // This isn't strictly necessary, but it simplifies things a bit since the 403 // current RedZone handling code assumes the SP is adjusted by the 404 // callee-save save/restore code. 405 if (canUseRedZone(MF)) 406 return false; 407 408 return true; 409 } 410 411 // Convert callee-save register save/restore instruction to do stack pointer 412 // decrement/increment to allocate/deallocate the callee-save stack area by 413 // converting store/load to use pre/post increment version. 414 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec( 415 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 416 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc) { 417 // Ignore instructions that do not operate on SP, i.e. shadow call stack 418 // instructions. 419 while (MBBI->getOpcode() == AArch64::STRXpost || 420 MBBI->getOpcode() == AArch64::LDRXpre) { 421 assert(MBBI->getOperand(0).getReg() != AArch64::SP); 422 ++MBBI; 423 } 424 425 unsigned NewOpc; 426 bool NewIsUnscaled = false; 427 switch (MBBI->getOpcode()) { 428 default: 429 llvm_unreachable("Unexpected callee-save save/restore opcode!"); 430 case AArch64::STPXi: 431 NewOpc = AArch64::STPXpre; 432 break; 433 case AArch64::STPDi: 434 NewOpc = AArch64::STPDpre; 435 break; 436 case AArch64::STRXui: 437 NewOpc = AArch64::STRXpre; 438 NewIsUnscaled = true; 439 break; 440 case AArch64::STRDui: 441 NewOpc = AArch64::STRDpre; 442 NewIsUnscaled = true; 443 break; 444 case AArch64::LDPXi: 445 NewOpc = AArch64::LDPXpost; 446 break; 447 case AArch64::LDPDi: 448 NewOpc = AArch64::LDPDpost; 449 break; 450 case AArch64::LDRXui: 451 NewOpc = AArch64::LDRXpost; 452 NewIsUnscaled = true; 453 break; 454 case AArch64::LDRDui: 455 NewOpc = AArch64::LDRDpost; 456 NewIsUnscaled = true; 457 break; 458 } 459 460 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc)); 461 MIB.addReg(AArch64::SP, RegState::Define); 462 463 // Copy all operands other than the immediate offset. 464 unsigned OpndIdx = 0; 465 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd; 466 ++OpndIdx) 467 MIB.add(MBBI->getOperand(OpndIdx)); 468 469 assert(MBBI->getOperand(OpndIdx).getImm() == 0 && 470 "Unexpected immediate offset in first/last callee-save save/restore " 471 "instruction!"); 472 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP && 473 "Unexpected base register in callee-save save/restore instruction!"); 474 // Last operand is immediate offset that needs fixing. 475 assert(CSStackSizeInc % 8 == 0); 476 int64_t CSStackSizeIncImm = CSStackSizeInc; 477 if (!NewIsUnscaled) 478 CSStackSizeIncImm /= 8; 479 MIB.addImm(CSStackSizeIncImm); 480 481 MIB.setMIFlags(MBBI->getFlags()); 482 MIB.setMemRefs(MBBI->memoperands_begin(), MBBI->memoperands_end()); 483 484 return std::prev(MBB.erase(MBBI)); 485 } 486 487 // Fixup callee-save register save/restore instructions to take into account 488 // combined SP bump by adding the local stack size to the stack offsets. 489 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI, 490 unsigned LocalStackSize) { 491 unsigned Opc = MI.getOpcode(); 492 493 // Ignore instructions that do not operate on SP, i.e. shadow call stack 494 // instructions. 495 if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre) { 496 assert(MI.getOperand(0).getReg() != AArch64::SP); 497 return; 498 } 499 500 (void)Opc; 501 assert((Opc == AArch64::STPXi || Opc == AArch64::STPDi || 502 Opc == AArch64::STRXui || Opc == AArch64::STRDui || 503 Opc == AArch64::LDPXi || Opc == AArch64::LDPDi || 504 Opc == AArch64::LDRXui || Opc == AArch64::LDRDui) && 505 "Unexpected callee-save save/restore opcode!"); 506 507 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1; 508 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP && 509 "Unexpected base register in callee-save save/restore instruction!"); 510 // Last operand is immediate offset that needs fixing. 511 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx); 512 // All generated opcodes have scaled offsets. 513 assert(LocalStackSize % 8 == 0); 514 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / 8); 515 } 516 517 static void adaptForLdStOpt(MachineBasicBlock &MBB, 518 MachineBasicBlock::iterator FirstSPPopI, 519 MachineBasicBlock::iterator LastPopI) { 520 // Sometimes (when we restore in the same order as we save), we can end up 521 // with code like this: 522 // 523 // ldp x26, x25, [sp] 524 // ldp x24, x23, [sp, #16] 525 // ldp x22, x21, [sp, #32] 526 // ldp x20, x19, [sp, #48] 527 // add sp, sp, #64 528 // 529 // In this case, it is always better to put the first ldp at the end, so 530 // that the load-store optimizer can run and merge the ldp and the add into 531 // a post-index ldp. 532 // If we managed to grab the first pop instruction, move it to the end. 533 if (ReverseCSRRestoreSeq) 534 MBB.splice(FirstSPPopI, &MBB, LastPopI); 535 // We should end up with something like this now: 536 // 537 // ldp x24, x23, [sp, #16] 538 // ldp x22, x21, [sp, #32] 539 // ldp x20, x19, [sp, #48] 540 // ldp x26, x25, [sp] 541 // add sp, sp, #64 542 // 543 // and the load-store optimizer can merge the last two instructions into: 544 // 545 // ldp x26, x25, [sp], #64 546 // 547 } 548 549 void AArch64FrameLowering::emitPrologue(MachineFunction &MF, 550 MachineBasicBlock &MBB) const { 551 MachineBasicBlock::iterator MBBI = MBB.begin(); 552 const MachineFrameInfo &MFI = MF.getFrameInfo(); 553 const Function &F = MF.getFunction(); 554 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 555 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 556 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 557 MachineModuleInfo &MMI = MF.getMMI(); 558 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 559 bool needsFrameMoves = MMI.hasDebugInfo() || F.needsUnwindTableEntry(); 560 bool HasFP = hasFP(MF); 561 562 // At this point, we're going to decide whether or not the function uses a 563 // redzone. In most cases, the function doesn't have a redzone so let's 564 // assume that's false and set it to true in the case that there's a redzone. 565 AFI->setHasRedZone(false); 566 567 // Debug location must be unknown since the first debug location is used 568 // to determine the end of the prologue. 569 DebugLoc DL; 570 571 // All calls are tail calls in GHC calling conv, and functions have no 572 // prologue/epilogue. 573 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 574 return; 575 576 int NumBytes = (int)MFI.getStackSize(); 577 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) { 578 assert(!HasFP && "unexpected function without stack frame but with FP"); 579 580 // All of the stack allocation is for locals. 581 AFI->setLocalStackSize(NumBytes); 582 583 if (!NumBytes) 584 return; 585 // REDZONE: If the stack size is less than 128 bytes, we don't need 586 // to actually allocate. 587 if (canUseRedZone(MF)) { 588 AFI->setHasRedZone(true); 589 ++NumRedZoneFunctions; 590 } else { 591 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 592 MachineInstr::FrameSetup); 593 594 // Label used to tie together the PROLOG_LABEL and the MachineMoves. 595 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol(); 596 // Encode the stack size of the leaf function. 597 unsigned CFIIndex = MF.addFrameInst( 598 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes)); 599 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 600 .addCFIIndex(CFIIndex) 601 .setMIFlags(MachineInstr::FrameSetup); 602 } 603 return; 604 } 605 606 bool IsWin64 = 607 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 608 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 609 610 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 611 // All of the remaining stack allocations are for locals. 612 AFI->setLocalStackSize(NumBytes - PrologueSaveSize); 613 614 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 615 if (CombineSPBump) { 616 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII, 617 MachineInstr::FrameSetup); 618 NumBytes = 0; 619 } else if (PrologueSaveSize != 0) { 620 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(MBB, MBBI, DL, TII, 621 -PrologueSaveSize); 622 NumBytes -= PrologueSaveSize; 623 } 624 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 625 626 // Move past the saves of the callee-saved registers, fixing up the offsets 627 // and pre-inc if we decided to combine the callee-save and local stack 628 // pointer bump above. 629 MachineBasicBlock::iterator End = MBB.end(); 630 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) { 631 if (CombineSPBump) 632 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize()); 633 ++MBBI; 634 } 635 if (HasFP) { 636 // Only set up FP if we actually need to. Frame pointer is fp = 637 // sp - fixedobject - 16. 638 int FPOffset = AFI->getCalleeSavedStackSize() - 16; 639 if (CombineSPBump) 640 FPOffset += AFI->getLocalStackSize(); 641 642 // Issue sub fp, sp, FPOffset or 643 // mov fp,sp when FPOffset is zero. 644 // Note: All stores of callee-saved registers are marked as "FrameSetup". 645 // This code marks the instruction(s) that set the FP also. 646 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP, FPOffset, TII, 647 MachineInstr::FrameSetup); 648 } 649 650 if (windowsRequiresStackProbe(MF, NumBytes)) { 651 uint32_t NumWords = NumBytes >> 4; 652 653 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15) 654 .addImm(NumWords) 655 .setMIFlags(MachineInstr::FrameSetup); 656 657 switch (MF.getTarget().getCodeModel()) { 658 case CodeModel::Small: 659 case CodeModel::Medium: 660 case CodeModel::Kernel: 661 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 662 .addExternalSymbol("__chkstk") 663 .addReg(AArch64::X15, RegState::Implicit) 664 .setMIFlags(MachineInstr::FrameSetup); 665 break; 666 case CodeModel::Large: 667 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT)) 668 .addReg(AArch64::X16, RegState::Define) 669 .addExternalSymbol("__chkstk") 670 .addExternalSymbol("__chkstk") 671 .setMIFlags(MachineInstr::FrameSetup); 672 673 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR)) 674 .addReg(AArch64::X16, RegState::Kill) 675 .addReg(AArch64::X15, RegState::Implicit | RegState::Define) 676 .setMIFlags(MachineInstr::FrameSetup); 677 break; 678 } 679 680 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP) 681 .addReg(AArch64::SP, RegState::Kill) 682 .addReg(AArch64::X15, RegState::Kill) 683 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4)) 684 .setMIFlags(MachineInstr::FrameSetup); 685 NumBytes = 0; 686 } 687 688 // Allocate space for the rest of the frame. 689 if (NumBytes) { 690 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); 691 unsigned scratchSPReg = AArch64::SP; 692 693 if (NeedsRealignment) { 694 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB); 695 assert(scratchSPReg != AArch64::NoRegister); 696 } 697 698 // If we're a leaf function, try using the red zone. 699 if (!canUseRedZone(MF)) 700 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have 701 // the correct value here, as NumBytes also includes padding bytes, 702 // which shouldn't be counted here. 703 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII, 704 MachineInstr::FrameSetup); 705 706 if (NeedsRealignment) { 707 const unsigned Alignment = MFI.getMaxAlignment(); 708 const unsigned NrBitsToZero = countTrailingZeros(Alignment); 709 assert(NrBitsToZero > 1); 710 assert(scratchSPReg != AArch64::SP); 711 712 // SUB X9, SP, NumBytes 713 // -- X9 is temporary register, so shouldn't contain any live data here, 714 // -- free to use. This is already produced by emitFrameOffset above. 715 // AND SP, X9, 0b11111...0000 716 // The logical immediates have a non-trivial encoding. The following 717 // formula computes the encoded immediate with all ones but 718 // NrBitsToZero zero bits as least significant bits. 719 uint32_t andMaskEncoded = (1 << 12) // = N 720 | ((64 - NrBitsToZero) << 6) // immr 721 | ((64 - NrBitsToZero - 1) << 0); // imms 722 723 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP) 724 .addReg(scratchSPReg, RegState::Kill) 725 .addImm(andMaskEncoded); 726 AFI->setStackRealigned(true); 727 } 728 } 729 730 // If we need a base pointer, set it up here. It's whatever the value of the 731 // stack pointer is at this point. Any variable size objects will be allocated 732 // after this, so we can still use the base pointer to reference locals. 733 // 734 // FIXME: Clarify FrameSetup flags here. 735 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is 736 // needed. 737 if (RegInfo->hasBasePointer(MF)) { 738 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, 739 false); 740 } 741 742 if (needsFrameMoves) { 743 const DataLayout &TD = MF.getDataLayout(); 744 const int StackGrowth = -TD.getPointerSize(0); 745 unsigned FramePtr = RegInfo->getFrameRegister(MF); 746 // An example of the prologue: 747 // 748 // .globl __foo 749 // .align 2 750 // __foo: 751 // Ltmp0: 752 // .cfi_startproc 753 // .cfi_personality 155, ___gxx_personality_v0 754 // Leh_func_begin: 755 // .cfi_lsda 16, Lexception33 756 // 757 // stp xa,bx, [sp, -#offset]! 758 // ... 759 // stp x28, x27, [sp, #offset-32] 760 // stp fp, lr, [sp, #offset-16] 761 // add fp, sp, #offset - 16 762 // sub sp, sp, #1360 763 // 764 // The Stack: 765 // +-------------------------------------------+ 766 // 10000 | ........ | ........ | ........ | ........ | 767 // 10004 | ........ | ........ | ........ | ........ | 768 // +-------------------------------------------+ 769 // 10008 | ........ | ........ | ........ | ........ | 770 // 1000c | ........ | ........ | ........ | ........ | 771 // +===========================================+ 772 // 10010 | X28 Register | 773 // 10014 | X28 Register | 774 // +-------------------------------------------+ 775 // 10018 | X27 Register | 776 // 1001c | X27 Register | 777 // +===========================================+ 778 // 10020 | Frame Pointer | 779 // 10024 | Frame Pointer | 780 // +-------------------------------------------+ 781 // 10028 | Link Register | 782 // 1002c | Link Register | 783 // +===========================================+ 784 // 10030 | ........ | ........ | ........ | ........ | 785 // 10034 | ........ | ........ | ........ | ........ | 786 // +-------------------------------------------+ 787 // 10038 | ........ | ........ | ........ | ........ | 788 // 1003c | ........ | ........ | ........ | ........ | 789 // +-------------------------------------------+ 790 // 791 // [sp] = 10030 :: >>initial value<< 792 // sp = 10020 :: stp fp, lr, [sp, #-16]! 793 // fp = sp == 10020 :: mov fp, sp 794 // [sp] == 10020 :: stp x28, x27, [sp, #-16]! 795 // sp == 10010 :: >>final value<< 796 // 797 // The frame pointer (w29) points to address 10020. If we use an offset of 798 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24 799 // for w27, and -32 for w28: 800 // 801 // Ltmp1: 802 // .cfi_def_cfa w29, 16 803 // Ltmp2: 804 // .cfi_offset w30, -8 805 // Ltmp3: 806 // .cfi_offset w29, -16 807 // Ltmp4: 808 // .cfi_offset w27, -24 809 // Ltmp5: 810 // .cfi_offset w28, -32 811 812 if (HasFP) { 813 // Define the current CFA rule to use the provided FP. 814 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true); 815 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa( 816 nullptr, Reg, 2 * StackGrowth - FixedObject)); 817 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 818 .addCFIIndex(CFIIndex) 819 .setMIFlags(MachineInstr::FrameSetup); 820 } else { 821 // Encode the stack size of the leaf function. 822 unsigned CFIIndex = MF.addFrameInst( 823 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize())); 824 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) 825 .addCFIIndex(CFIIndex) 826 .setMIFlags(MachineInstr::FrameSetup); 827 } 828 829 // Now emit the moves for whatever callee saved regs we have (including FP, 830 // LR if those are saved). 831 emitCalleeSavedFrameMoves(MBB, MBBI); 832 } 833 } 834 835 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, 836 MachineBasicBlock &MBB) const { 837 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 838 MachineFrameInfo &MFI = MF.getFrameInfo(); 839 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 840 const TargetInstrInfo *TII = Subtarget.getInstrInfo(); 841 DebugLoc DL; 842 bool IsTailCallReturn = false; 843 if (MBB.end() != MBBI) { 844 DL = MBBI->getDebugLoc(); 845 unsigned RetOpcode = MBBI->getOpcode(); 846 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi || 847 RetOpcode == AArch64::TCRETURNri; 848 } 849 int NumBytes = MFI.getStackSize(); 850 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 851 852 // All calls are tail calls in GHC calling conv, and functions have no 853 // prologue/epilogue. 854 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 855 return; 856 857 // Initial and residual are named for consistency with the prologue. Note that 858 // in the epilogue, the residual adjustment is executed first. 859 uint64_t ArgumentPopSize = 0; 860 if (IsTailCallReturn) { 861 MachineOperand &StackAdjust = MBBI->getOperand(1); 862 863 // For a tail-call in a callee-pops-arguments environment, some or all of 864 // the stack may actually be in use for the call's arguments, this is 865 // calculated during LowerCall and consumed here... 866 ArgumentPopSize = StackAdjust.getImm(); 867 } else { 868 // ... otherwise the amount to pop is *all* of the argument space, 869 // conveniently stored in the MachineFunctionInfo by 870 // LowerFormalArguments. This will, of course, be zero for the C calling 871 // convention. 872 ArgumentPopSize = AFI->getArgumentStackToRestore(); 873 } 874 875 // The stack frame should be like below, 876 // 877 // ---------------------- --- 878 // | | | 879 // | BytesInStackArgArea| CalleeArgStackSize 880 // | (NumReusableBytes) | (of tail call) 881 // | | --- 882 // | | | 883 // ---------------------| --- | 884 // | | | | 885 // | CalleeSavedReg | | | 886 // | (CalleeSavedStackSize)| | | 887 // | | | | 888 // ---------------------| | NumBytes 889 // | | StackSize (StackAdjustUp) 890 // | LocalStackSize | | | 891 // | (covering callee | | | 892 // | args) | | | 893 // | | | | 894 // ---------------------- --- --- 895 // 896 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize 897 // = StackSize + ArgumentPopSize 898 // 899 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps 900 // it as the 2nd argument of AArch64ISD::TC_RETURN. 901 902 bool IsWin64 = 903 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 904 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 905 906 uint64_t AfterCSRPopSize = ArgumentPopSize; 907 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; 908 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes); 909 // Assume we can't combine the last pop with the sp restore. 910 911 if (!CombineSPBump && PrologueSaveSize != 0) { 912 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); 913 // Converting the last ldp to a post-index ldp is valid only if the last 914 // ldp's offset is 0. 915 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1); 916 // If the offset is 0, convert it to a post-index ldp. 917 if (OffsetOp.getImm() == 0) { 918 convertCalleeSaveRestoreToSPPrePostIncDec(MBB, Pop, DL, TII, 919 PrologueSaveSize); 920 } else { 921 // If not, make sure to emit an add after the last ldp. 922 // We're doing this by transfering the size to be restored from the 923 // adjustment *before* the CSR pops to the adjustment *after* the CSR 924 // pops. 925 AfterCSRPopSize += PrologueSaveSize; 926 } 927 } 928 929 // Move past the restores of the callee-saved registers. 930 // If we plan on combining the sp bump of the local stack size and the callee 931 // save stack size, we might need to adjust the CSR save and restore offsets. 932 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator(); 933 MachineBasicBlock::iterator Begin = MBB.begin(); 934 while (LastPopI != Begin) { 935 --LastPopI; 936 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) { 937 ++LastPopI; 938 break; 939 } else if (CombineSPBump) 940 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize()); 941 } 942 943 // If there is a single SP update, insert it before the ret and we're done. 944 if (CombineSPBump) { 945 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, 946 NumBytes + AfterCSRPopSize, TII, 947 MachineInstr::FrameDestroy); 948 return; 949 } 950 951 NumBytes -= PrologueSaveSize; 952 assert(NumBytes >= 0 && "Negative stack allocation size!?"); 953 954 if (!hasFP(MF)) { 955 bool RedZone = canUseRedZone(MF); 956 // If this was a redzone leaf function, we don't need to restore the 957 // stack pointer (but we may need to pop stack args for fastcc). 958 if (RedZone && AfterCSRPopSize == 0) 959 return; 960 961 bool NoCalleeSaveRestore = PrologueSaveSize == 0; 962 int StackRestoreBytes = RedZone ? 0 : NumBytes; 963 if (NoCalleeSaveRestore) 964 StackRestoreBytes += AfterCSRPopSize; 965 966 // If we were able to combine the local stack pop with the argument pop, 967 // then we're done. 968 bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0; 969 970 // If we're done after this, make sure to help the load store optimizer. 971 if (Done) 972 adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI); 973 974 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, 975 StackRestoreBytes, TII, MachineInstr::FrameDestroy); 976 if (Done) 977 return; 978 979 NumBytes = 0; 980 } 981 982 // Restore the original stack pointer. 983 // FIXME: Rather than doing the math here, we should instead just use 984 // non-post-indexed loads for the restores if we aren't actually going to 985 // be able to save any instructions. 986 if (MFI.hasVarSizedObjects() || AFI->isStackRealigned()) 987 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP, 988 -AFI->getCalleeSavedStackSize() + 16, TII, 989 MachineInstr::FrameDestroy); 990 else if (NumBytes) 991 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes, TII, 992 MachineInstr::FrameDestroy); 993 994 // This must be placed after the callee-save restore code because that code 995 // assumes the SP is at the same location as it was after the callee-save save 996 // code in the prologue. 997 if (AfterCSRPopSize) { 998 // Find an insertion point for the first ldp so that it goes before the 999 // shadow call stack epilog instruction. This ensures that the restore of 1000 // lr from x18 is placed after the restore from sp. 1001 auto FirstSPPopI = MBB.getFirstTerminator(); 1002 while (FirstSPPopI != Begin) { 1003 auto Prev = std::prev(FirstSPPopI); 1004 if (Prev->getOpcode() != AArch64::LDRXpre || 1005 Prev->getOperand(0).getReg() == AArch64::SP) 1006 break; 1007 FirstSPPopI = Prev; 1008 } 1009 1010 adaptForLdStOpt(MBB, FirstSPPopI, LastPopI); 1011 1012 emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP, 1013 AfterCSRPopSize, TII, MachineInstr::FrameDestroy); 1014 } 1015 } 1016 1017 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for 1018 /// debug info. It's the same as what we use for resolving the code-gen 1019 /// references for now. FIXME: This can go wrong when references are 1020 /// SP-relative and simple call frames aren't used. 1021 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF, 1022 int FI, 1023 unsigned &FrameReg) const { 1024 return resolveFrameIndexReference(MF, FI, FrameReg); 1025 } 1026 1027 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF, 1028 int FI, unsigned &FrameReg, 1029 bool PreferFP) const { 1030 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1031 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1032 MF.getSubtarget().getRegisterInfo()); 1033 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1034 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 1035 bool IsWin64 = 1036 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); 1037 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; 1038 int FPOffset = MFI.getObjectOffset(FI) + FixedObject + 16; 1039 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize(); 1040 bool isFixed = MFI.isFixedObjectIndex(FI); 1041 bool isCSR = !isFixed && MFI.getObjectOffset(FI) >= 1042 -((int)AFI->getCalleeSavedStackSize()); 1043 1044 // Use frame pointer to reference fixed objects. Use it for locals if 1045 // there are VLAs or a dynamically realigned SP (and thus the SP isn't 1046 // reliable as a base). Make sure useFPForScavengingIndex() does the 1047 // right thing for the emergency spill slot. 1048 bool UseFP = false; 1049 if (AFI->hasStackFrame()) { 1050 // Note: Keeping the following as multiple 'if' statements rather than 1051 // merging to a single expression for readability. 1052 // 1053 // Argument access should always use the FP. 1054 if (isFixed) { 1055 UseFP = hasFP(MF); 1056 } else if (isCSR && RegInfo->needsStackRealignment(MF)) { 1057 // References to the CSR area must use FP if we're re-aligning the stack 1058 // since the dynamically-sized alignment padding is between the SP/BP and 1059 // the CSR area. 1060 assert(hasFP(MF) && "Re-aligned stack must have frame pointer"); 1061 UseFP = true; 1062 } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) { 1063 // If the FPOffset is negative, we have to keep in mind that the 1064 // available offset range for negative offsets is smaller than for 1065 // positive ones. If an offset is 1066 // available via the FP and the SP, use whichever is closest. 1067 bool FPOffsetFits = FPOffset >= -256; 1068 PreferFP |= Offset > -FPOffset; 1069 1070 if (MFI.hasVarSizedObjects()) { 1071 // If we have variable sized objects, we can use either FP or BP, as the 1072 // SP offset is unknown. We can use the base pointer if we have one and 1073 // FP is not preferred. If not, we're stuck with using FP. 1074 bool CanUseBP = RegInfo->hasBasePointer(MF); 1075 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best. 1076 UseFP = PreferFP; 1077 else if (!CanUseBP) // Can't use BP. Forced to use FP. 1078 UseFP = true; 1079 // else we can use BP and FP, but the offset from FP won't fit. 1080 // That will make us scavenge registers which we can probably avoid by 1081 // using BP. If it won't fit for BP either, we'll scavenge anyway. 1082 } else if (FPOffset >= 0) { 1083 // Use SP or FP, whichever gives us the best chance of the offset 1084 // being in range for direct access. If the FPOffset is positive, 1085 // that'll always be best, as the SP will be even further away. 1086 UseFP = true; 1087 } else { 1088 // We have the choice between FP and (SP or BP). 1089 if (FPOffsetFits && PreferFP) // If FP is the best fit, use it. 1090 UseFP = true; 1091 } 1092 } 1093 } 1094 1095 assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) && 1096 "In the presence of dynamic stack pointer realignment, " 1097 "non-argument/CSR objects cannot be accessed through the frame pointer"); 1098 1099 if (UseFP) { 1100 FrameReg = RegInfo->getFrameRegister(MF); 1101 return FPOffset; 1102 } 1103 1104 // Use the base pointer if we have one. 1105 if (RegInfo->hasBasePointer(MF)) 1106 FrameReg = RegInfo->getBaseRegister(); 1107 else { 1108 assert(!MFI.hasVarSizedObjects() && 1109 "Can't use SP when we have var sized objects."); 1110 FrameReg = AArch64::SP; 1111 // If we're using the red zone for this function, the SP won't actually 1112 // be adjusted, so the offsets will be negative. They're also all 1113 // within range of the signed 9-bit immediate instructions. 1114 if (canUseRedZone(MF)) 1115 Offset -= AFI->getLocalStackSize(); 1116 } 1117 1118 return Offset; 1119 } 1120 1121 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) { 1122 // Do not set a kill flag on values that are also marked as live-in. This 1123 // happens with the @llvm-returnaddress intrinsic and with arguments passed in 1124 // callee saved registers. 1125 // Omitting the kill flags is conservatively correct even if the live-in 1126 // is not used after all. 1127 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg); 1128 return getKillRegState(!IsLiveIn); 1129 } 1130 1131 static bool produceCompactUnwindFrame(MachineFunction &MF) { 1132 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>(); 1133 AttributeList Attrs = MF.getFunction().getAttributes(); 1134 return Subtarget.isTargetMachO() && 1135 !(Subtarget.getTargetLowering()->supportSwiftError() && 1136 Attrs.hasAttrSomewhere(Attribute::SwiftError)); 1137 } 1138 1139 namespace { 1140 1141 struct RegPairInfo { 1142 unsigned Reg1 = AArch64::NoRegister; 1143 unsigned Reg2 = AArch64::NoRegister; 1144 int FrameIdx; 1145 int Offset; 1146 bool IsGPR; 1147 1148 RegPairInfo() = default; 1149 1150 bool isPaired() const { return Reg2 != AArch64::NoRegister; } 1151 }; 1152 1153 } // end anonymous namespace 1154 1155 static void computeCalleeSaveRegisterPairs( 1156 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI, 1157 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs, 1158 bool &NeedShadowCallStackProlog) { 1159 1160 if (CSI.empty()) 1161 return; 1162 1163 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1164 MachineFrameInfo &MFI = MF.getFrameInfo(); 1165 CallingConv::ID CC = MF.getFunction().getCallingConv(); 1166 unsigned Count = CSI.size(); 1167 (void)CC; 1168 // MachO's compact unwind format relies on all registers being stored in 1169 // pairs. 1170 assert((!produceCompactUnwindFrame(MF) || 1171 CC == CallingConv::PreserveMost || 1172 (Count & 1) == 0) && 1173 "Odd number of callee-saved regs to spill!"); 1174 int Offset = AFI->getCalleeSavedStackSize(); 1175 1176 for (unsigned i = 0; i < Count; ++i) { 1177 RegPairInfo RPI; 1178 RPI.Reg1 = CSI[i].getReg(); 1179 1180 assert(AArch64::GPR64RegClass.contains(RPI.Reg1) || 1181 AArch64::FPR64RegClass.contains(RPI.Reg1)); 1182 RPI.IsGPR = AArch64::GPR64RegClass.contains(RPI.Reg1); 1183 1184 // Add the next reg to the pair if it is in the same register class. 1185 if (i + 1 < Count) { 1186 unsigned NextReg = CSI[i + 1].getReg(); 1187 if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || 1188 (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) 1189 RPI.Reg2 = NextReg; 1190 } 1191 1192 // If either of the registers to be saved is the lr register, it means that 1193 // we also need to save lr in the shadow call stack. 1194 if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) && 1195 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) { 1196 if (!MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) 1197 report_fatal_error("Must reserve x18 to use shadow call stack"); 1198 NeedShadowCallStackProlog = true; 1199 } 1200 1201 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI 1202 // list to come in sorted by frame index so that we can issue the store 1203 // pair instructions directly. Assert if we see anything otherwise. 1204 // 1205 // The order of the registers in the list is controlled by 1206 // getCalleeSavedRegs(), so they will always be in-order, as well. 1207 assert((!RPI.isPaired() || 1208 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && 1209 "Out of order callee saved regs!"); 1210 1211 // MachO's compact unwind format relies on all registers being stored in 1212 // adjacent register pairs. 1213 assert((!produceCompactUnwindFrame(MF) || 1214 CC == CallingConv::PreserveMost || 1215 (RPI.isPaired() && 1216 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || 1217 RPI.Reg1 + 1 == RPI.Reg2))) && 1218 "Callee-save registers not saved as adjacent register pair!"); 1219 1220 RPI.FrameIdx = CSI[i].getFrameIdx(); 1221 1222 if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { 1223 // Round up size of non-pair to pair size if we need to pad the 1224 // callee-save area to ensure 16-byte alignment. 1225 Offset -= 16; 1226 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16); 1227 MFI.setObjectAlignment(RPI.FrameIdx, 16); 1228 AFI->setCalleeSaveStackHasFreeSpace(true); 1229 } else 1230 Offset -= RPI.isPaired() ? 16 : 8; 1231 assert(Offset % 8 == 0); 1232 RPI.Offset = Offset / 8; 1233 assert((RPI.Offset >= -64 && RPI.Offset <= 63) && 1234 "Offset out of bounds for LDP/STP immediate"); 1235 1236 RegPairs.push_back(RPI); 1237 if (RPI.isPaired()) 1238 ++i; 1239 } 1240 } 1241 1242 bool AArch64FrameLowering::spillCalleeSavedRegisters( 1243 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1244 const std::vector<CalleeSavedInfo> &CSI, 1245 const TargetRegisterInfo *TRI) const { 1246 MachineFunction &MF = *MBB.getParent(); 1247 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1248 DebugLoc DL; 1249 SmallVector<RegPairInfo, 8> RegPairs; 1250 1251 bool NeedShadowCallStackProlog = false; 1252 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, 1253 NeedShadowCallStackProlog); 1254 const MachineRegisterInfo &MRI = MF.getRegInfo(); 1255 1256 if (NeedShadowCallStackProlog) { 1257 // Shadow call stack prolog: str x30, [x18], #8 1258 BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost)) 1259 .addReg(AArch64::X18, RegState::Define) 1260 .addReg(AArch64::LR) 1261 .addReg(AArch64::X18) 1262 .addImm(8) 1263 .setMIFlag(MachineInstr::FrameSetup); 1264 1265 // This instruction also makes x18 live-in to the entry block. 1266 MBB.addLiveIn(AArch64::X18); 1267 } 1268 1269 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; 1270 ++RPII) { 1271 RegPairInfo RPI = *RPII; 1272 unsigned Reg1 = RPI.Reg1; 1273 unsigned Reg2 = RPI.Reg2; 1274 unsigned StrOpc; 1275 1276 // Issue sequence of spills for cs regs. The first spill may be converted 1277 // to a pre-decrement store later by emitPrologue if the callee-save stack 1278 // area allocation can't be combined with the local stack area allocation. 1279 // For example: 1280 // stp x22, x21, [sp, #0] // addImm(+0) 1281 // stp x20, x19, [sp, #16] // addImm(+2) 1282 // stp fp, lr, [sp, #32] // addImm(+4) 1283 // Rationale: This sequence saves uop updates compared to a sequence of 1284 // pre-increment spills like stp xi,xj,[sp,#-16]! 1285 // Note: Similar rationale and sequence for restores in epilog. 1286 if (RPI.IsGPR) 1287 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; 1288 else 1289 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; 1290 LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI); 1291 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 1292 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1293 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 1294 dbgs() << ")\n"); 1295 1296 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); 1297 if (!MRI.isReserved(Reg1)) 1298 MBB.addLiveIn(Reg1); 1299 if (RPI.isPaired()) { 1300 if (!MRI.isReserved(Reg2)) 1301 MBB.addLiveIn(Reg2); 1302 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)); 1303 MIB.addMemOperand(MF.getMachineMemOperand( 1304 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1305 MachineMemOperand::MOStore, 8, 8)); 1306 } 1307 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) 1308 .addReg(AArch64::SP) 1309 .addImm(RPI.Offset) // [sp, #offset*8], where factor*8 is implicit 1310 .setMIFlag(MachineInstr::FrameSetup); 1311 MIB.addMemOperand(MF.getMachineMemOperand( 1312 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1313 MachineMemOperand::MOStore, 8, 8)); 1314 } 1315 return true; 1316 } 1317 1318 bool AArch64FrameLowering::restoreCalleeSavedRegisters( 1319 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1320 std::vector<CalleeSavedInfo> &CSI, 1321 const TargetRegisterInfo *TRI) const { 1322 MachineFunction &MF = *MBB.getParent(); 1323 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); 1324 DebugLoc DL; 1325 SmallVector<RegPairInfo, 8> RegPairs; 1326 1327 if (MI != MBB.end()) 1328 DL = MI->getDebugLoc(); 1329 1330 bool NeedShadowCallStackProlog = false; 1331 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs, 1332 NeedShadowCallStackProlog); 1333 1334 auto EmitMI = [&](const RegPairInfo &RPI) { 1335 unsigned Reg1 = RPI.Reg1; 1336 unsigned Reg2 = RPI.Reg2; 1337 1338 // Issue sequence of restores for cs regs. The last restore may be converted 1339 // to a post-increment load later by emitEpilogue if the callee-save stack 1340 // area allocation can't be combined with the local stack area allocation. 1341 // For example: 1342 // ldp fp, lr, [sp, #32] // addImm(+4) 1343 // ldp x20, x19, [sp, #16] // addImm(+2) 1344 // ldp x22, x21, [sp, #0] // addImm(+0) 1345 // Note: see comment in spillCalleeSavedRegisters() 1346 unsigned LdrOpc; 1347 if (RPI.IsGPR) 1348 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; 1349 else 1350 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; 1351 LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI); 1352 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI); 1353 dbgs() << ") -> fi#(" << RPI.FrameIdx; 1354 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1; 1355 dbgs() << ")\n"); 1356 1357 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); 1358 if (RPI.isPaired()) { 1359 MIB.addReg(Reg2, getDefRegState(true)); 1360 MIB.addMemOperand(MF.getMachineMemOperand( 1361 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx + 1), 1362 MachineMemOperand::MOLoad, 8, 8)); 1363 } 1364 MIB.addReg(Reg1, getDefRegState(true)) 1365 .addReg(AArch64::SP) 1366 .addImm(RPI.Offset) // [sp, #offset*8] where the factor*8 is implicit 1367 .setMIFlag(MachineInstr::FrameDestroy); 1368 MIB.addMemOperand(MF.getMachineMemOperand( 1369 MachinePointerInfo::getFixedStack(MF, RPI.FrameIdx), 1370 MachineMemOperand::MOLoad, 8, 8)); 1371 }; 1372 1373 if (ReverseCSRRestoreSeq) 1374 for (const RegPairInfo &RPI : reverse(RegPairs)) 1375 EmitMI(RPI); 1376 else 1377 for (const RegPairInfo &RPI : RegPairs) 1378 EmitMI(RPI); 1379 1380 if (NeedShadowCallStackProlog) { 1381 // Shadow call stack epilog: ldr x30, [x18, #-8]! 1382 BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre)) 1383 .addReg(AArch64::X18, RegState::Define) 1384 .addReg(AArch64::LR, RegState::Define) 1385 .addReg(AArch64::X18) 1386 .addImm(-8) 1387 .setMIFlag(MachineInstr::FrameDestroy); 1388 } 1389 1390 return true; 1391 } 1392 1393 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF, 1394 BitVector &SavedRegs, 1395 RegScavenger *RS) const { 1396 // All calls are tail calls in GHC calling conv, and functions have no 1397 // prologue/epilogue. 1398 if (MF.getFunction().getCallingConv() == CallingConv::GHC) 1399 return; 1400 1401 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1402 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( 1403 MF.getSubtarget().getRegisterInfo()); 1404 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1405 unsigned UnspilledCSGPR = AArch64::NoRegister; 1406 unsigned UnspilledCSGPRPaired = AArch64::NoRegister; 1407 1408 MachineFrameInfo &MFI = MF.getFrameInfo(); 1409 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 1410 1411 unsigned BasePointerReg = RegInfo->hasBasePointer(MF) 1412 ? RegInfo->getBaseRegister() 1413 : (unsigned)AArch64::NoRegister; 1414 1415 unsigned SpillEstimate = SavedRegs.count(); 1416 for (unsigned i = 0; CSRegs[i]; ++i) { 1417 unsigned Reg = CSRegs[i]; 1418 unsigned PairedReg = CSRegs[i ^ 1]; 1419 if (Reg == BasePointerReg) 1420 SpillEstimate++; 1421 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) 1422 SpillEstimate++; 1423 } 1424 SpillEstimate += 2; // Conservatively include FP+LR in the estimate 1425 unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate; 1426 1427 // The frame record needs to be created by saving the appropriate registers 1428 if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) { 1429 SavedRegs.set(AArch64::FP); 1430 SavedRegs.set(AArch64::LR); 1431 } 1432 1433 unsigned ExtraCSSpill = 0; 1434 // Figure out which callee-saved registers to save/restore. 1435 for (unsigned i = 0; CSRegs[i]; ++i) { 1436 const unsigned Reg = CSRegs[i]; 1437 1438 // Add the base pointer register to SavedRegs if it is callee-save. 1439 if (Reg == BasePointerReg) 1440 SavedRegs.set(Reg); 1441 1442 bool RegUsed = SavedRegs.test(Reg); 1443 unsigned PairedReg = CSRegs[i ^ 1]; 1444 if (!RegUsed) { 1445 if (AArch64::GPR64RegClass.contains(Reg) && 1446 !RegInfo->isReservedReg(MF, Reg)) { 1447 UnspilledCSGPR = Reg; 1448 UnspilledCSGPRPaired = PairedReg; 1449 } 1450 continue; 1451 } 1452 1453 // MachO's compact unwind format relies on all registers being stored in 1454 // pairs. 1455 // FIXME: the usual format is actually better if unwinding isn't needed. 1456 if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) { 1457 SavedRegs.set(PairedReg); 1458 if (AArch64::GPR64RegClass.contains(PairedReg) && 1459 !RegInfo->isReservedReg(MF, PairedReg)) 1460 ExtraCSSpill = PairedReg; 1461 } 1462 } 1463 1464 LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; 1465 for (unsigned Reg 1466 : SavedRegs.set_bits()) dbgs() 1467 << ' ' << printReg(Reg, RegInfo); 1468 dbgs() << "\n";); 1469 1470 // If any callee-saved registers are used, the frame cannot be eliminated. 1471 unsigned NumRegsSpilled = SavedRegs.count(); 1472 bool CanEliminateFrame = NumRegsSpilled == 0; 1473 1474 // The CSR spill slots have not been allocated yet, so estimateStackSize 1475 // won't include them. 1476 unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled; 1477 LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); 1478 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF); 1479 bool BigStack = (CFSize > EstimatedStackSizeLimit); 1480 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) 1481 AFI->setHasStackFrame(true); 1482 1483 // Estimate if we might need to scavenge a register at some point in order 1484 // to materialize a stack offset. If so, either spill one additional 1485 // callee-saved register or reserve a special spill slot to facilitate 1486 // register scavenging. If we already spilled an extra callee-saved register 1487 // above to keep the number of spills even, we don't need to do anything else 1488 // here. 1489 if (BigStack) { 1490 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) { 1491 LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo) 1492 << " to get a scratch register.\n"); 1493 SavedRegs.set(UnspilledCSGPR); 1494 // MachO's compact unwind format relies on all registers being stored in 1495 // pairs, so if we need to spill one extra for BigStack, then we need to 1496 // store the pair. 1497 if (produceCompactUnwindFrame(MF)) 1498 SavedRegs.set(UnspilledCSGPRPaired); 1499 ExtraCSSpill = UnspilledCSGPRPaired; 1500 NumRegsSpilled = SavedRegs.count(); 1501 } 1502 1503 // If we didn't find an extra callee-saved register to spill, create 1504 // an emergency spill slot. 1505 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) { 1506 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 1507 const TargetRegisterClass &RC = AArch64::GPR64RegClass; 1508 unsigned Size = TRI->getSpillSize(RC); 1509 unsigned Align = TRI->getSpillAlignment(RC); 1510 int FI = MFI.CreateStackObject(Size, Align, false); 1511 RS->addScavengingFrameIndex(FI); 1512 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI 1513 << " as the emergency spill slot.\n"); 1514 } 1515 } 1516 1517 // Round up to register pair alignment to avoid additional SP adjustment 1518 // instructions. 1519 AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); 1520 } 1521 1522 bool AArch64FrameLowering::enableStackSlotScavenging( 1523 const MachineFunction &MF) const { 1524 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); 1525 return AFI->hasCalleeSaveStackFreeSpace(); 1526 } 1527