1 //=- AArch64CallingConv.td - Calling Conventions for AArch64 -*- tablegen -*-=// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This describes the calling conventions for AArch64 architecture. 11 // 12 //===----------------------------------------------------------------------===// 13 14 /// CCIfAlign - Match of the original alignment of the arg 15 class CCIfAlign<string Align, CCAction A> : 16 CCIf<!strconcat("ArgFlags.getOrigAlign() == ", Align), A>; 17 /// CCIfBigEndian - Match only if we're in big endian mode. 18 class CCIfBigEndian<CCAction A> : 19 CCIf<"State.getMachineFunction().getDataLayout().isBigEndian()", A>; 20 21 //===----------------------------------------------------------------------===// 22 // ARM AAPCS64 Calling Convention 23 //===----------------------------------------------------------------------===// 24 25 def CC_AArch64_AAPCS : CallingConv<[ 26 CCIfType<[iPTR], CCBitConvertToType<i64>>, 27 CCIfType<[v2f32], CCBitConvertToType<v2i32>>, 28 CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, 29 30 // Big endian vectors must be passed as if they were 1-element vectors so that 31 // their lanes are in a consistent order. 32 CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8], 33 CCBitConvertToType<f64>>>, 34 CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8], 35 CCBitConvertToType<f128>>>, 36 37 // An SRet is passed in X8, not X0 like a normal pointer parameter. 38 CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, 39 40 // Put ByVal arguments directly on the stack. Minimum size and alignment of a 41 // slot is 64-bit. 42 CCIfByVal<CCPassByVal<8, 8>>, 43 44 // The 'nest' parameter, if any, is passed in X18. 45 // Darwin uses X18 as the platform register and hence 'nest' isn't currently 46 // supported there. 47 CCIfNest<CCAssignToReg<[X18]>>, 48 49 // Pass SwiftSelf in a callee saved register. 50 CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, 51 52 // A SwiftError is passed in X21. 53 CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, 54 55 CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, 56 57 // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, 58 // up to eight each of GPR and FPR. 59 CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, 60 CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], 61 [X0, X1, X2, X3, X4, X5, X6, X7]>>, 62 // i128 is split to two i64s, we can't fit half to register X7. 63 CCIfType<[i64], CCIfSplit<CCAssignToRegWithShadow<[X0, X2, X4, X6], 64 [X0, X1, X3, X5]>>>, 65 66 // i128 is split to two i64s, and its stack alignment is 16 bytes. 67 CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>, 68 69 CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], 70 [W0, W1, W2, W3, W4, W5, W6, W7]>>, 71 CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], 72 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 73 CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], 74 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 75 CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 76 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 77 CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], 78 CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 79 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 80 CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 81 CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 82 83 // If more than will fit in registers, pass them on the stack instead. 84 CCIfType<[i1, i8, i16, f16], CCAssignToStack<8, 8>>, 85 CCIfType<[i32, f32], CCAssignToStack<8, 8>>, 86 CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], 87 CCAssignToStack<8, 8>>, 88 CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 89 CCAssignToStack<16, 16>> 90 ]>; 91 92 def RetCC_AArch64_AAPCS : CallingConv<[ 93 CCIfType<[iPTR], CCBitConvertToType<i64>>, 94 CCIfType<[v2f32], CCBitConvertToType<v2i32>>, 95 CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>, 96 97 CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, 98 99 // Big endian vectors must be passed as if they were 1-element vectors so that 100 // their lanes are in a consistent order. 101 CCIfBigEndian<CCIfType<[v2i32, v2f32, v4i16, v4f16, v8i8], 102 CCBitConvertToType<f64>>>, 103 CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8], 104 CCBitConvertToType<f128>>>, 105 106 CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, 107 CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], 108 [X0, X1, X2, X3, X4, X5, X6, X7]>>, 109 CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], 110 [W0, W1, W2, W3, W4, W5, W6, W7]>>, 111 CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], 112 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 113 CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], 114 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 115 CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 116 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 117 CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], 118 CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 119 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 120 CCIfType<[f128, v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 121 CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> 122 ]>; 123 124 // Vararg functions on windows pass floats in integer registers 125 def CC_AArch64_Win64_VarArg : CallingConv<[ 126 CCIfType<[f16, f32], CCPromoteToType<f64>>, 127 CCIfType<[f64], CCBitConvertToType<i64>>, 128 CCDelegateTo<CC_AArch64_AAPCS> 129 ]>; 130 131 132 // Darwin uses a calling convention which differs in only two ways 133 // from the standard one at this level: 134 // + i128s (i.e. split i64s) don't need even registers. 135 // + Stack slots are sized as needed rather than being at least 64-bit. 136 def CC_AArch64_DarwinPCS : CallingConv<[ 137 CCIfType<[iPTR], CCBitConvertToType<i64>>, 138 CCIfType<[v2f32], CCBitConvertToType<v2i32>>, 139 CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, 140 141 // An SRet is passed in X8, not X0 like a normal pointer parameter. 142 CCIfSRet<CCIfType<[i64], CCAssignToRegWithShadow<[X8], [W8]>>>, 143 144 // Put ByVal arguments directly on the stack. Minimum size and alignment of a 145 // slot is 64-bit. 146 CCIfByVal<CCPassByVal<8, 8>>, 147 148 // Pass SwiftSelf in a callee saved register. 149 CCIfSwiftSelf<CCIfType<[i64], CCAssignToRegWithShadow<[X20], [W20]>>>, 150 151 // A SwiftError is passed in X21. 152 CCIfSwiftError<CCIfType<[i64], CCAssignToRegWithShadow<[X21], [W21]>>>, 153 154 CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Block">>, 155 156 // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, 157 // up to eight each of GPR and FPR. 158 CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, 159 CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], 160 [X0, X1, X2, X3, X4, X5, X6, X7]>>, 161 // i128 is split to two i64s, we can't fit half to register X7. 162 CCIfType<[i64], 163 CCIfSplit<CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6], 164 [W0, W1, W2, W3, W4, W5, W6]>>>, 165 // i128 is split to two i64s, and its stack alignment is 16 bytes. 166 CCIfType<[i64], CCIfSplit<CCAssignToStackWithShadow<8, 16, [X7]>>>, 167 168 CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], 169 [W0, W1, W2, W3, W4, W5, W6, W7]>>, 170 CCIfType<[f16], CCAssignToRegWithShadow<[H0, H1, H2, H3, H4, H5, H6, H7], 171 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 172 CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], 173 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 174 CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 175 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 176 CCIfType<[v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], 177 CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 178 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 179 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 180 CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 181 182 // If more than will fit in registers, pass them on the stack instead. 183 CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, 184 CCIf<"ValVT == MVT::i16 || ValVT == MVT::f16", CCAssignToStack<2, 2>>, 185 CCIfType<[i32, f32], CCAssignToStack<4, 4>>, 186 CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8, v4f16], 187 CCAssignToStack<8, 8>>, 188 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 189 CCAssignToStack<16, 16>> 190 ]>; 191 192 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ 193 CCIfType<[iPTR], CCBitConvertToType<i64>>, 194 CCIfType<[v2f32], CCBitConvertToType<v2i32>>, 195 CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>, 196 197 CCIfConsecutiveRegs<CCCustom<"CC_AArch64_Custom_Stack_Block">>, 198 199 // Handle all scalar types as either i64 or f64. 200 CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, 201 CCIfType<[f16, f32], CCPromoteToType<f64>>, 202 203 // Everything is on the stack. 204 // i128 is split to two i64s, and its stack alignment is 16 bytes. 205 CCIfType<[i64], CCIfSplit<CCAssignToStack<8, 16>>>, 206 CCIfType<[i64, f64, v1i64, v2i32, v4i16, v8i8, v1f64, v2f32, v4f16], 207 CCAssignToStack<8, 8>>, 208 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, v2f64, v8f16], 209 CCAssignToStack<16, 16>> 210 ]>; 211 212 // The WebKit_JS calling convention only passes the first argument (the callee) 213 // in register and the remaining arguments on stack. We allow 32bit stack slots, 214 // so that WebKit can write partial values in the stack and define the other 215 // 32bit quantity as undef. 216 def CC_AArch64_WebKit_JS : CallingConv<[ 217 // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). 218 CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, 219 CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, 220 CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, 221 222 // Pass the remaining arguments on the stack instead. 223 CCIfType<[i32, f32], CCAssignToStack<4, 4>>, 224 CCIfType<[i64, f64], CCAssignToStack<8, 8>> 225 ]>; 226 227 def RetCC_AArch64_WebKit_JS : CallingConv<[ 228 CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], 229 [X0, X1, X2, X3, X4, X5, X6, X7]>>, 230 CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7], 231 [W0, W1, W2, W3, W4, W5, W6, W7]>>, 232 CCIfType<[f32], CCAssignToRegWithShadow<[S0, S1, S2, S3, S4, S5, S6, S7], 233 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, 234 CCIfType<[f64], CCAssignToRegWithShadow<[D0, D1, D2, D3, D4, D5, D6, D7], 235 [Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>> 236 ]>; 237 238 //===----------------------------------------------------------------------===// 239 // ARM64 Calling Convention for GHC 240 //===----------------------------------------------------------------------===// 241 242 // This calling convention is specific to the Glasgow Haskell Compiler. 243 // The only documentation is the GHC source code, specifically the C header 244 // file: 245 // 246 // https://github.com/ghc/ghc/blob/master/includes/stg/MachRegs.h 247 // 248 // which defines the registers for the Spineless Tagless G-Machine (STG) that 249 // GHC uses to implement lazy evaluation. The generic STG machine has a set of 250 // registers which are mapped to appropriate set of architecture specific 251 // registers for each CPU architecture. 252 // 253 // The STG Machine is documented here: 254 // 255 // https://ghc.haskell.org/trac/ghc/wiki/Commentary/Compiler/GeneratedCode 256 // 257 // The AArch64 register mapping is under the heading "The ARMv8/AArch64 ABI 258 // register mapping". 259 260 def CC_AArch64_GHC : CallingConv<[ 261 CCIfType<[iPTR], CCBitConvertToType<i64>>, 262 263 // Handle all vector types as either f64 or v2f64. 264 CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>, 265 CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>, 266 267 CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, 268 CCIfType<[f32], CCAssignToReg<[S8, S9, S10, S11]>>, 269 CCIfType<[f64], CCAssignToReg<[D12, D13, D14, D15]>>, 270 271 // Promote i8/i16/i32 arguments to i64. 272 CCIfType<[i8, i16, i32], CCPromoteToType<i64>>, 273 274 // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, SpLim 275 CCIfType<[i64], CCAssignToReg<[X19, X20, X21, X22, X23, X24, X25, X26, X27, X28]>> 276 ]>; 277 278 // FIXME: LR is only callee-saved in the sense that *we* preserve it and are 279 // presumably a callee to someone. External functions may not do so, but this 280 // is currently safe since BL has LR as an implicit-def and what happens after a 281 // tail call doesn't matter. 282 // 283 // It would be better to model its preservation semantics properly (create a 284 // vreg on entry, use it in RET & tail call generation; make that vreg def if we 285 // end up saving LR as part of a call frame). Watch this space... 286 def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22, 287 X23, X24, X25, X26, X27, X28, 288 D8, D9, D10, D11, 289 D12, D13, D14, D15)>; 290 291 // Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since 292 // 'this' and the pointer return value are both passed in X0 in these cases, 293 // this can be partially modelled by treating X0 as a callee-saved register; 294 // only the resulting RegMask is used; the SaveList is ignored 295 // 296 // (For generic ARM 64-bit ABI code, clang will not generate constructors or 297 // destructors with 'this' returns, so this RegMask will not be used in that 298 // case) 299 def CSR_AArch64_AAPCS_ThisReturn : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X0)>; 300 301 def CSR_AArch64_AAPCS_SwiftError 302 : CalleeSavedRegs<(sub CSR_AArch64_AAPCS, X21)>; 303 304 // The function used by Darwin to obtain the address of a thread-local variable 305 // guarantees more than a normal AAPCS function. x16 and x17 are used on the 306 // fast path for calculation, but other registers except X0 (argument/return) 307 // and LR (it is a call, after all) are preserved. 308 def CSR_AArch64_TLS_Darwin 309 : CalleeSavedRegs<(add (sub (sequence "X%u", 1, 28), X16, X17), 310 FP, 311 (sequence "Q%u", 0, 31))>; 312 313 // We can only handle a register pair with adjacent registers, the register pair 314 // should belong to the same class as well. Since the access function on the 315 // fast path calls a function that follows CSR_AArch64_TLS_Darwin, 316 // CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. 317 def CSR_AArch64_CXX_TLS_Darwin 318 : CalleeSavedRegs<(add CSR_AArch64_AAPCS, 319 (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), 320 (sequence "D%u", 0, 31))>; 321 322 // CSRs that are handled by prologue, epilogue. 323 def CSR_AArch64_CXX_TLS_Darwin_PE 324 : CalleeSavedRegs<(add LR, FP)>; 325 326 // CSRs that are handled explicitly via copies. 327 def CSR_AArch64_CXX_TLS_Darwin_ViaCopy 328 : CalleeSavedRegs<(sub CSR_AArch64_CXX_TLS_Darwin, LR, FP)>; 329 330 // The ELF stub used for TLS-descriptor access saves every feasible 331 // register. Only X0 and LR are clobbered. 332 def CSR_AArch64_TLS_ELF 333 : CalleeSavedRegs<(add (sequence "X%u", 1, 28), FP, 334 (sequence "Q%u", 0, 31))>; 335 336 def CSR_AArch64_AllRegs 337 : CalleeSavedRegs<(add (sequence "W%u", 0, 30), WSP, 338 (sequence "X%u", 0, 28), FP, LR, SP, 339 (sequence "B%u", 0, 31), (sequence "H%u", 0, 31), 340 (sequence "S%u", 0, 31), (sequence "D%u", 0, 31), 341 (sequence "Q%u", 0, 31))>; 342 343 def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>; 344 345 def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS, 346 (sequence "X%u", 9, 15))>; 347 348 def CSR_AArch64_StackProbe_Windows 349 : CalleeSavedRegs<(add (sequence "X%u", 0, 15), 350 (sequence "X%u", 18, 28), FP, SP, 351 (sequence "Q%u", 0, 31))>; 352 353 // Variants of the standard calling conventions for shadow call stack. 354 // These all preserve x18 in addition to any other registers. 355 def CSR_AArch64_NoRegs_SCS 356 : CalleeSavedRegs<(add CSR_AArch64_NoRegs, X18)>; 357 def CSR_AArch64_AllRegs_SCS 358 : CalleeSavedRegs<(add CSR_AArch64_AllRegs, X18)>; 359 def CSR_AArch64_CXX_TLS_Darwin_SCS 360 : CalleeSavedRegs<(add CSR_AArch64_CXX_TLS_Darwin, X18)>; 361 def CSR_AArch64_AAPCS_SwiftError_SCS 362 : CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>; 363 def CSR_AArch64_RT_MostRegs_SCS 364 : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>; 365 def CSR_AArch64_AAPCS_SCS 366 : CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>; 367