1 /* ----------------------------------------------------------------------- 2 darwin64.S - Copyright (c) 2006 Free Software Foundation, Inc. 3 derived from unix64.S 4 5 x86-64 Foreign Function Interface for Darwin. 6 7 Permission is hereby granted, free of charge, to any person obtaining 8 a copy of this software and associated documentation files (the 9 ``Software''), to deal in the Software without restriction, including 10 without limitation the rights to use, copy, modify, merge, publish, 11 distribute, sublicense, and/or sell copies of the Software, and to 12 permit persons to whom the Software is furnished to do so, subject to 13 the following conditions: 14 15 The above copyright notice and this permission notice shall be included 16 in all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 24 OTHER DEALINGS IN THE SOFTWARE. 25 ----------------------------------------------------------------------- */ 26 27 #ifdef __x86_64__ 28 #define LIBFFI_ASM 29 #include <fficonfig.h> 30 #include <ffi.h> 31 32 .file "darwin64.S" 33 .text 34 35 /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, 36 void *raddr, void (*fnaddr)()); 37 38 Bit o trickiness here -- ARGS+BYTES is the base of the stack frame 39 for this function. This has been allocated by ffi_call. We also 40 deallocate some of the stack that has been alloca'd. */ 41 42 .align 3 43 .globl _ffi_call_unix64 44 45 _ffi_call_unix64: 46 LUW0: 47 movq (%rsp), %r10 /* Load return address. */ 48 movq %rdi, %r12 /* Save a copy of the register area. */ 49 leaq (%rdi, %rsi), %rax /* Find local stack base. */ 50 movq %rdx, (%rax) /* Save flags. */ 51 movq %rcx, 8(%rax) /* Save raddr. */ 52 movq %rbp, 16(%rax) /* Save old frame pointer. */ 53 movq %r10, 24(%rax) /* Relocate return address. */ 54 movq %rax, %rbp /* Finalize local stack frame. */ 55 LUW1: 56 /* movq %rdi, %r10 // Save a copy of the register area. */ 57 movq %r12, %r10 58 movq %r8, %r11 /* Save a copy of the target fn. */ 59 movl %r9d, %eax /* Set number of SSE registers. */ 60 61 /* Load up all argument registers. */ 62 movq (%r10), %rdi 63 movq 8(%r10), %rsi 64 movq 16(%r10), %rdx 65 movq 24(%r10), %rcx 66 movq 32(%r10), %r8 67 movq 40(%r10), %r9 68 testl %eax, %eax 69 jnz Lload_sse 70 Lret_from_load_sse: 71 72 /* Deallocate the reg arg area. */ 73 leaq 176(%r10), %rsp 74 75 /* Call the user function. */ 76 call *%r11 77 78 /* Deallocate stack arg area; local stack frame in redzone. */ 79 leaq 24(%rbp), %rsp 80 81 movq 0(%rbp), %rcx /* Reload flags. */ 82 movq 8(%rbp), %rdi /* Reload raddr. */ 83 movq 16(%rbp), %rbp /* Reload old frame pointer. */ 84 LUW2: 85 86 /* The first byte of the flags contains the FFI_TYPE. */ 87 movzbl %cl, %r10d 88 leaq Lstore_table(%rip), %r11 89 movslq (%r11, %r10, 4), %r10 90 addq %r11, %r10 91 jmp *%r10 92 93 Lstore_table: 94 .long Lst_void-Lstore_table /* FFI_TYPE_VOID */ 95 .long Lst_sint32-Lstore_table /* FFI_TYPE_INT */ 96 .long Lst_float-Lstore_table /* FFI_TYPE_FLOAT */ 97 .long Lst_double-Lstore_table /* FFI_TYPE_DOUBLE */ 98 .long Lst_ldouble-Lstore_table /* FFI_TYPE_LONGDOUBLE */ 99 .long Lst_uint8-Lstore_table /* FFI_TYPE_UINT8 */ 100 .long Lst_sint8-Lstore_table /* FFI_TYPE_SINT8 */ 101 .long Lst_uint16-Lstore_table /* FFI_TYPE_UINT16 */ 102 .long Lst_sint16-Lstore_table /* FFI_TYPE_SINT16 */ 103 .long Lst_uint32-Lstore_table /* FFI_TYPE_UINT32 */ 104 .long Lst_sint32-Lstore_table /* FFI_TYPE_SINT32 */ 105 .long Lst_int64-Lstore_table /* FFI_TYPE_UINT64 */ 106 .long Lst_int64-Lstore_table /* FFI_TYPE_SINT64 */ 107 .long Lst_struct-Lstore_table /* FFI_TYPE_STRUCT */ 108 .long Lst_int64-Lstore_table /* FFI_TYPE_POINTER */ 109 110 .text 111 .align 3 112 Lst_void: 113 ret 114 .align 3 115 Lst_uint8: 116 movzbq %al, %rax 117 movq %rax, (%rdi) 118 ret 119 .align 3 120 Lst_sint8: 121 movsbq %al, %rax 122 movq %rax, (%rdi) 123 ret 124 .align 3 125 Lst_uint16: 126 movzwq %ax, %rax 127 movq %rax, (%rdi) 128 .align 3 129 Lst_sint16: 130 movswq %ax, %rax 131 movq %rax, (%rdi) 132 ret 133 .align 3 134 Lst_uint32: 135 movl %eax, %eax 136 movq %rax, (%rdi) 137 .align 3 138 Lst_sint32: 139 cltq 140 movq %rax, (%rdi) 141 ret 142 .align 3 143 Lst_int64: 144 movq %rax, (%rdi) 145 ret 146 .align 3 147 Lst_float: 148 movss %xmm0, (%rdi) 149 ret 150 .align 3 151 Lst_double: 152 movsd %xmm0, (%rdi) 153 ret 154 Lst_ldouble: 155 fstpt (%rdi) 156 ret 157 .align 3 158 Lst_struct: 159 leaq -20(%rsp), %rsi /* Scratch area in redzone. */ 160 161 /* We have to locate the values now, and since we don't want to 162 write too much data into the user's return value, we spill the 163 value to a 16 byte scratch area first. Bits 8, 9, and 10 164 control where the values are located. Only one of the three 165 bits will be set; see ffi_prep_cif_machdep for the pattern. */ 166 movd %xmm0, %r10 167 movd %xmm1, %r11 168 testl $0x100, %ecx 169 cmovnz %rax, %rdx 170 cmovnz %r10, %rax 171 testl $0x200, %ecx 172 cmovnz %r10, %rdx 173 testl $0x400, %ecx 174 cmovnz %r10, %rax 175 cmovnz %r11, %rdx 176 movq %rax, (%rsi) 177 movq %rdx, 8(%rsi) 178 179 /* Bits 12-31 contain the true size of the structure. Copy from 180 the scratch area to the true destination. */ 181 shrl $12, %ecx 182 rep movsb 183 ret 184 185 /* Many times we can avoid loading any SSE registers at all. 186 It's not worth an indirect jump to load the exact set of 187 SSE registers needed; zero or all is a good compromise. */ 188 .align 3 189 LUW3: 190 Lload_sse: 191 movdqa 48(%r10), %xmm0 192 movdqa 64(%r10), %xmm1 193 movdqa 80(%r10), %xmm2 194 movdqa 96(%r10), %xmm3 195 movdqa 112(%r10), %xmm4 196 movdqa 128(%r10), %xmm5 197 movdqa 144(%r10), %xmm6 198 movdqa 160(%r10), %xmm7 199 jmp Lret_from_load_sse 200 201 LUW4: 202 .align 3 203 .globl _ffi_closure_unix64 204 205 _ffi_closure_unix64: 206 LUW5: 207 /* The carry flag is set by the trampoline iff SSE registers 208 are used. Don't clobber it before the branch instruction. */ 209 leaq -200(%rsp), %rsp 210 LUW6: 211 movq %rdi, (%rsp) 212 movq %rsi, 8(%rsp) 213 movq %rdx, 16(%rsp) 214 movq %rcx, 24(%rsp) 215 movq %r8, 32(%rsp) 216 movq %r9, 40(%rsp) 217 jc Lsave_sse 218 Lret_from_save_sse: 219 220 movq %r10, %rdi 221 leaq 176(%rsp), %rsi 222 movq %rsp, %rdx 223 leaq 208(%rsp), %rcx 224 call _ffi_closure_unix64_inner 225 226 /* Deallocate stack frame early; return value is now in redzone. */ 227 addq $200, %rsp 228 LUW7: 229 230 /* The first byte of the return value contains the FFI_TYPE. */ 231 movzbl %al, %r10d 232 leaq Lload_table(%rip), %r11 233 movslq (%r11, %r10, 4), %r10 234 addq %r11, %r10 235 jmp *%r10 236 237 Lload_table: 238 .long Lld_void-Lload_table /* FFI_TYPE_VOID */ 239 .long Lld_int32-Lload_table /* FFI_TYPE_INT */ 240 .long Lld_float-Lload_table /* FFI_TYPE_FLOAT */ 241 .long Lld_double-Lload_table /* FFI_TYPE_DOUBLE */ 242 .long Lld_ldouble-Lload_table /* FFI_TYPE_LONGDOUBLE */ 243 .long Lld_int8-Lload_table /* FFI_TYPE_UINT8 */ 244 .long Lld_int8-Lload_table /* FFI_TYPE_SINT8 */ 245 .long Lld_int16-Lload_table /* FFI_TYPE_UINT16 */ 246 .long Lld_int16-Lload_table /* FFI_TYPE_SINT16 */ 247 .long Lld_int32-Lload_table /* FFI_TYPE_UINT32 */ 248 .long Lld_int32-Lload_table /* FFI_TYPE_SINT32 */ 249 .long Lld_int64-Lload_table /* FFI_TYPE_UINT64 */ 250 .long Lld_int64-Lload_table /* FFI_TYPE_SINT64 */ 251 .long Lld_struct-Lload_table /* FFI_TYPE_STRUCT */ 252 .long Lld_int64-Lload_table /* FFI_TYPE_POINTER */ 253 254 .text 255 .align 3 256 Lld_void: 257 ret 258 .align 3 259 Lld_int8: 260 movzbl -24(%rsp), %eax 261 ret 262 .align 3 263 Lld_int16: 264 movzwl -24(%rsp), %eax 265 ret 266 .align 3 267 Lld_int32: 268 movl -24(%rsp), %eax 269 ret 270 .align 3 271 Lld_int64: 272 movq -24(%rsp), %rax 273 ret 274 .align 3 275 Lld_float: 276 movss -24(%rsp), %xmm0 277 ret 278 .align 3 279 Lld_double: 280 movsd -24(%rsp), %xmm0 281 ret 282 .align 3 283 Lld_ldouble: 284 fldt -24(%rsp) 285 ret 286 .align 3 287 Lld_struct: 288 /* There are four possibilities here, %rax/%rdx, %xmm0/%rax, 289 %rax/%xmm0, %xmm0/%xmm1. We collapse two by always loading 290 both rdx and xmm1 with the second word. For the remaining, 291 bit 8 set means xmm0 gets the second word, and bit 9 means 292 that rax gets the second word. */ 293 movq -24(%rsp), %rcx 294 movq -16(%rsp), %rdx 295 movq -16(%rsp), %xmm1 296 testl $0x100, %eax 297 cmovnz %rdx, %rcx 298 movd %rcx, %xmm0 299 testl $0x200, %eax 300 movq -24(%rsp), %rax 301 cmovnz %rdx, %rax 302 ret 303 304 /* See the comment above Lload_sse; the same logic applies here. */ 305 .align 3 306 LUW8: 307 Lsave_sse: 308 movdqa %xmm0, 48(%rsp) 309 movdqa %xmm1, 64(%rsp) 310 movdqa %xmm2, 80(%rsp) 311 movdqa %xmm3, 96(%rsp) 312 movdqa %xmm4, 112(%rsp) 313 movdqa %xmm5, 128(%rsp) 314 movdqa %xmm6, 144(%rsp) 315 movdqa %xmm7, 160(%rsp) 316 jmp Lret_from_save_sse 317 318 LUW9: 319 .section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 320 EH_frame1: 321 .set L$set$0,LECIE1-LSCIE1 /* CIE Length */ 322 .long L$set$0 323 LSCIE1: 324 .long 0x0 /* CIE Identifier Tag */ 325 .byte 0x1 /* CIE Version */ 326 .ascii "zR\0" /* CIE Augmentation */ 327 .byte 0x1 /* uleb128 0x1; CIE Code Alignment Factor */ 328 .byte 0x78 /* sleb128 -8; CIE Data Alignment Factor */ 329 .byte 0x10 /* CIE RA Column */ 330 .byte 0x1 /* uleb128 0x1; Augmentation size */ 331 .byte 0x10 /* FDE Encoding (pcrel sdata4) */ 332 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 333 .byte 0x7 /* uleb128 0x7 */ 334 .byte 0x8 /* uleb128 0x8 */ 335 .byte 0x90 /* DW_CFA_offset, column 0x10 */ 336 .byte 0x1 337 .align 3 338 LECIE1: 339 .globl _ffi_call_unix64.eh 340 _ffi_call_unix64.eh: 341 LSFDE1: 342 .set L$set$1,LEFDE1-LASFDE1 /* FDE Length */ 343 .long L$set$1 344 LASFDE1: 345 .long LASFDE1-EH_frame1 /* FDE CIE offset */ 346 .quad LUW0-. /* FDE initial location */ 347 .set L$set$2,LUW4-LUW0 /* FDE address range */ 348 .quad L$set$2 349 .byte 0x0 /* Augmentation size */ 350 .byte 0x4 /* DW_CFA_advance_loc4 */ 351 .set L$set$3,LUW1-LUW0 352 .long L$set$3 353 354 /* New stack frame based off rbp. This is an itty bit of unwind 355 trickery in that the CFA *has* changed. There is no easy way 356 to describe it correctly on entry to the function. Fortunately, 357 it doesn't matter too much since at all points we can correctly 358 unwind back to ffi_call. Note that the location to which we 359 moved the return address is (the new) CFA-8, so from the 360 perspective of the unwind info, it hasn't moved. */ 361 .byte 0xc /* DW_CFA_def_cfa, %rbp offset 32 */ 362 .byte 0x6 363 .byte 0x20 364 .byte 0x80+6 /* DW_CFA_offset, %rbp offset 2*-8 */ 365 .byte 0x2 366 .byte 0xa /* DW_CFA_remember_state */ 367 368 .byte 0x4 /* DW_CFA_advance_loc4 */ 369 .set L$set$4,LUW2-LUW1 370 .long L$set$4 371 .byte 0xc /* DW_CFA_def_cfa, %rsp offset 8 */ 372 .byte 0x7 373 .byte 0x8 374 .byte 0xc0+6 /* DW_CFA_restore, %rbp */ 375 376 .byte 0x4 /* DW_CFA_advance_loc4 */ 377 .set L$set$5,LUW3-LUW2 378 .long L$set$5 379 .byte 0xb /* DW_CFA_restore_state */ 380 381 .align 3 382 LEFDE1: 383 .globl _ffi_closure_unix64.eh 384 _ffi_closure_unix64.eh: 385 LSFDE3: 386 .set L$set$6,LEFDE3-LASFDE3 /* FDE Length */ 387 .long L$set$6 388 LASFDE3: 389 .long LASFDE3-EH_frame1 /* FDE CIE offset */ 390 .quad LUW5-. /* FDE initial location */ 391 .set L$set$7,LUW9-LUW5 /* FDE address range */ 392 .quad L$set$7 393 .byte 0x0 /* Augmentation size */ 394 395 .byte 0x4 /* DW_CFA_advance_loc4 */ 396 .set L$set$8,LUW6-LUW5 397 .long L$set$8 398 .byte 0xe /* DW_CFA_def_cfa_offset */ 399 .byte 208,1 /* uleb128 208 */ 400 .byte 0xa /* DW_CFA_remember_state */ 401 402 .byte 0x4 /* DW_CFA_advance_loc4 */ 403 .set L$set$9,LUW7-LUW6 404 .long L$set$9 405 .byte 0xe /* DW_CFA_def_cfa_offset */ 406 .byte 0x8 407 408 .byte 0x4 /* DW_CFA_advance_loc4 */ 409 .set L$set$10,LUW8-LUW7 410 .long L$set$10 411 .byte 0xb /* DW_CFA_restore_state */ 412 413 .align 3 414 LEFDE3: 415 .subsections_via_symbols 416 417 #endif /* __x86_64__ */ 418