1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "asm_support_x86_64.S" 18 #include "interpreter/cfi_asm_support.h" 19 20 #include "arch/quick_alloc_entrypoints.S" 21 22 MACRO0(ASSERT_USE_READ_BARRIER) 23 #if !defined(USE_READ_BARRIER) 24 int3 25 int3 26 #endif 27 END_MACRO 28 29 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME) 30 // Create space for ART FP callee-saved registers 31 subq MACRO_LITERAL(4 * 8), %rsp 32 CFI_ADJUST_CFA_OFFSET(4 * 8) 33 movq %xmm12, 0(%rsp) 34 movq %xmm13, 8(%rsp) 35 movq %xmm14, 16(%rsp) 36 movq %xmm15, 24(%rsp) 37 END_MACRO 38 39 MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME) 40 // Restore ART FP callee-saved registers 41 movq 0(%rsp), %xmm12 42 movq 8(%rsp), %xmm13 43 movq 16(%rsp), %xmm14 44 movq 24(%rsp), %xmm15 45 addq MACRO_LITERAL(4 * 8), %rsp 46 CFI_ADJUST_CFA_OFFSET(- 4 * 8) 47 END_MACRO 48 49 // For x86, the CFA is esp+4, the address above the pushed return address on the stack. 50 51 /* 52 * Macro that sets up the callee save frame to conform with 53 * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) 54 */ 55 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME) 56 #if defined(__APPLE__) 57 int3 58 int3 59 #else 60 // R10 := Runtime::Current() 61 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 62 movq (%r10), %r10 63 // Save callee save registers to agree with core spills bitmap. 64 PUSH r15 // Callee save. 65 PUSH r14 // Callee save. 66 PUSH r13 // Callee save. 67 PUSH r12 // Callee save. 68 PUSH rbp // Callee save. 69 PUSH rbx // Callee save. 70 // Create space for FPR args, plus space for ArtMethod*. 71 subq MACRO_LITERAL(4 * 8 + 8), %rsp 72 CFI_ADJUST_CFA_OFFSET(4 * 8 + 8) 73 // Save FPRs. 74 movq %xmm12, 8(%rsp) 75 movq %xmm13, 16(%rsp) 76 movq %xmm14, 24(%rsp) 77 movq %xmm15, 32(%rsp) 78 // R10 := ArtMethod* for save all callee save frame method. 79 movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10 80 // Store ArtMethod* to bottom of stack. 81 movq %r10, 0(%rsp) 82 // Store rsp as the top quick frame. 83 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 84 85 // Ugly compile-time check, but we only have the preprocessor. 86 // Last +8: implicit return address pushed on stack when caller made call. 87 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8) 88 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected." 89 #endif 90 #endif // __APPLE__ 91 END_MACRO 92 93 /* 94 * Macro that sets up the callee save frame to conform with 95 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly) 96 */ 97 MACRO0(SETUP_SAVE_REFS_ONLY_FRAME) 98 #if defined(__APPLE__) 99 int3 100 int3 101 #else 102 // R10 := Runtime::Current() 103 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 104 movq (%r10), %r10 105 // Save callee and GPR args, mixed together to agree with core spills bitmap. 106 PUSH r15 // Callee save. 107 PUSH r14 // Callee save. 108 PUSH r13 // Callee save. 109 PUSH r12 // Callee save. 110 PUSH rbp // Callee save. 111 PUSH rbx // Callee save. 112 // Create space for FPR args, plus space for ArtMethod*. 113 subq LITERAL(8 + 4 * 8), %rsp 114 CFI_ADJUST_CFA_OFFSET(8 + 4 * 8) 115 // Save FPRs. 116 movq %xmm12, 8(%rsp) 117 movq %xmm13, 16(%rsp) 118 movq %xmm14, 24(%rsp) 119 movq %xmm15, 32(%rsp) 120 // R10 := ArtMethod* for refs only callee save frame method. 121 movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10 122 // Store ArtMethod* to bottom of stack. 123 movq %r10, 0(%rsp) 124 // Store rsp as the stop quick frame. 125 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 126 127 // Ugly compile-time check, but we only have the preprocessor. 128 // Last +8: implicit return address pushed on stack when caller made call. 129 #if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8) 130 #error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected." 131 #endif 132 #endif // __APPLE__ 133 END_MACRO 134 135 MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME) 136 movq 8(%rsp), %xmm12 137 movq 16(%rsp), %xmm13 138 movq 24(%rsp), %xmm14 139 movq 32(%rsp), %xmm15 140 addq LITERAL(8 + 4*8), %rsp 141 CFI_ADJUST_CFA_OFFSET(-8 - 4*8) 142 // TODO: optimize by not restoring callee-saves restored by the ABI 143 POP rbx 144 POP rbp 145 POP r12 146 POP r13 147 POP r14 148 POP r15 149 END_MACRO 150 151 /* 152 * Macro that sets up the callee save frame to conform with 153 * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs) 154 */ 155 MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME) 156 #if defined(__APPLE__) 157 int3 158 int3 159 #else 160 // R10 := Runtime::Current() 161 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 162 movq (%r10), %r10 163 // Save callee and GPR args, mixed together to agree with core spills bitmap. 164 PUSH r15 // Callee save. 165 PUSH r14 // Callee save. 166 PUSH r13 // Callee save. 167 PUSH r12 // Callee save. 168 PUSH r9 // Quick arg 5. 169 PUSH r8 // Quick arg 4. 170 PUSH rsi // Quick arg 1. 171 PUSH rbp // Callee save. 172 PUSH rbx // Callee save. 173 PUSH rdx // Quick arg 2. 174 PUSH rcx // Quick arg 3. 175 // Create space for FPR args and create 2 slots for ArtMethod*. 176 subq MACRO_LITERAL(16 + 12 * 8), %rsp 177 CFI_ADJUST_CFA_OFFSET(16 + 12 * 8) 178 // R10 := ArtMethod* for ref and args callee save frame method. 179 movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10 180 // Save FPRs. 181 movq %xmm0, 16(%rsp) 182 movq %xmm1, 24(%rsp) 183 movq %xmm2, 32(%rsp) 184 movq %xmm3, 40(%rsp) 185 movq %xmm4, 48(%rsp) 186 movq %xmm5, 56(%rsp) 187 movq %xmm6, 64(%rsp) 188 movq %xmm7, 72(%rsp) 189 movq %xmm12, 80(%rsp) 190 movq %xmm13, 88(%rsp) 191 movq %xmm14, 96(%rsp) 192 movq %xmm15, 104(%rsp) 193 // Store ArtMethod* to bottom of stack. 194 movq %r10, 0(%rsp) 195 // Store rsp as the top quick frame. 196 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 197 198 // Ugly compile-time check, but we only have the preprocessor. 199 // Last +8: implicit return address pushed on stack when caller made call. 200 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8) 201 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected." 202 #endif 203 #endif // __APPLE__ 204 END_MACRO 205 206 MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI) 207 // Save callee and GPR args, mixed together to agree with core spills bitmap. 208 PUSH r15 // Callee save. 209 PUSH r14 // Callee save. 210 PUSH r13 // Callee save. 211 PUSH r12 // Callee save. 212 PUSH r9 // Quick arg 5. 213 PUSH r8 // Quick arg 4. 214 PUSH rsi // Quick arg 1. 215 PUSH rbp // Callee save. 216 PUSH rbx // Callee save. 217 PUSH rdx // Quick arg 2. 218 PUSH rcx // Quick arg 3. 219 // Create space for FPR args and create 2 slots for ArtMethod*. 220 subq LITERAL(80 + 4 * 8), %rsp 221 CFI_ADJUST_CFA_OFFSET(80 + 4 * 8) 222 // Save FPRs. 223 movq %xmm0, 16(%rsp) 224 movq %xmm1, 24(%rsp) 225 movq %xmm2, 32(%rsp) 226 movq %xmm3, 40(%rsp) 227 movq %xmm4, 48(%rsp) 228 movq %xmm5, 56(%rsp) 229 movq %xmm6, 64(%rsp) 230 movq %xmm7, 72(%rsp) 231 movq %xmm12, 80(%rsp) 232 movq %xmm13, 88(%rsp) 233 movq %xmm14, 96(%rsp) 234 movq %xmm15, 104(%rsp) 235 // Store ArtMethod to bottom of stack. 236 movq %rdi, 0(%rsp) 237 // Store rsp as the stop quick frame. 238 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 239 END_MACRO 240 241 MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME) 242 // Restore FPRs. 243 movq 16(%rsp), %xmm0 244 movq 24(%rsp), %xmm1 245 movq 32(%rsp), %xmm2 246 movq 40(%rsp), %xmm3 247 movq 48(%rsp), %xmm4 248 movq 56(%rsp), %xmm5 249 movq 64(%rsp), %xmm6 250 movq 72(%rsp), %xmm7 251 movq 80(%rsp), %xmm12 252 movq 88(%rsp), %xmm13 253 movq 96(%rsp), %xmm14 254 movq 104(%rsp), %xmm15 255 addq MACRO_LITERAL(80 + 4 * 8), %rsp 256 CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8)) 257 // Restore callee and GPR args, mixed together to agree with core spills bitmap. 258 POP rcx 259 POP rdx 260 POP rbx 261 POP rbp 262 POP rsi 263 POP r8 264 POP r9 265 POP r12 266 POP r13 267 POP r14 268 POP r15 269 END_MACRO 270 271 /* 272 * Macro that sets up the callee save frame to conform with 273 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 274 * when R14 and R15 are already saved. 275 */ 276 MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 277 #if defined(__APPLE__) 278 int3 279 int3 280 #else 281 // Save core registers from highest to lowest to agree with core spills bitmap. 282 // R14 and R15, or at least placeholders for them, are already on the stack. 283 PUSH r13 284 PUSH r12 285 PUSH r11 286 PUSH r10 287 PUSH r9 288 PUSH r8 289 PUSH rdi 290 PUSH rsi 291 PUSH rbp 292 PUSH rbx 293 PUSH rdx 294 PUSH rcx 295 PUSH rax 296 // Create space for FPRs and stack alignment padding. 297 subq MACRO_LITERAL(8 + 16 * 8), %rsp 298 CFI_ADJUST_CFA_OFFSET(8 + 16 * 8) 299 // R10 := Runtime::Current() 300 movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10 301 movq (%r10), %r10 302 // Save FPRs. 303 movq %xmm0, 8(%rsp) 304 movq %xmm1, 16(%rsp) 305 movq %xmm2, 24(%rsp) 306 movq %xmm3, 32(%rsp) 307 movq %xmm4, 40(%rsp) 308 movq %xmm5, 48(%rsp) 309 movq %xmm6, 56(%rsp) 310 movq %xmm7, 64(%rsp) 311 movq %xmm8, 72(%rsp) 312 movq %xmm9, 80(%rsp) 313 movq %xmm10, 88(%rsp) 314 movq %xmm11, 96(%rsp) 315 movq %xmm12, 104(%rsp) 316 movq %xmm13, 112(%rsp) 317 movq %xmm14, 120(%rsp) 318 movq %xmm15, 128(%rsp) 319 // Push ArtMethod* for save everything frame method. 320 pushq \runtime_method_offset(%r10) 321 CFI_ADJUST_CFA_OFFSET(8) 322 // Store rsp as the top quick frame. 323 movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET 324 325 // Ugly compile-time check, but we only have the preprocessor. 326 // Last +8: implicit return address pushed on stack when caller made call. 327 #if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8) 328 #error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected." 329 #endif 330 #endif // __APPLE__ 331 END_MACRO 332 333 /* 334 * Macro that sets up the callee save frame to conform with 335 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 336 * when R15 is already saved. 337 */ 338 MACRO1(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 339 PUSH r14 340 SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED \runtime_method_offset 341 END_MACRO 342 343 /* 344 * Macro that sets up the callee save frame to conform with 345 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 346 */ 347 MACRO1(SETUP_SAVE_EVERYTHING_FRAME, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 348 PUSH r15 349 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED \runtime_method_offset 350 END_MACRO 351 352 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS) 353 // Restore FPRs. Method and padding is still on the stack. 354 movq 16(%rsp), %xmm0 355 movq 24(%rsp), %xmm1 356 movq 32(%rsp), %xmm2 357 movq 40(%rsp), %xmm3 358 movq 48(%rsp), %xmm4 359 movq 56(%rsp), %xmm5 360 movq 64(%rsp), %xmm6 361 movq 72(%rsp), %xmm7 362 movq 80(%rsp), %xmm8 363 movq 88(%rsp), %xmm9 364 movq 96(%rsp), %xmm10 365 movq 104(%rsp), %xmm11 366 movq 112(%rsp), %xmm12 367 movq 120(%rsp), %xmm13 368 movq 128(%rsp), %xmm14 369 movq 136(%rsp), %xmm15 370 END_MACRO 371 372 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX) 373 // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap. 374 POP rcx 375 POP rdx 376 POP rbx 377 POP rbp 378 POP rsi 379 POP rdi 380 POP r8 381 POP r9 382 POP r10 383 POP r11 384 POP r12 385 POP r13 386 POP r14 387 POP r15 388 END_MACRO 389 390 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME) 391 RESTORE_SAVE_EVERYTHING_FRAME_FRPS 392 393 // Remove save everything callee save method, stack alignment padding and FPRs. 394 addq MACRO_LITERAL(16 + 16 * 8), %rsp 395 CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8)) 396 397 POP rax 398 RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX 399 END_MACRO 400 401 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX) 402 RESTORE_SAVE_EVERYTHING_FRAME_FRPS 403 404 // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX. 405 addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp 406 CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8)) 407 408 RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX 409 END_MACRO 410 411 /* 412 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 413 * exception is Thread::Current()->exception_ when the runtime method frame is ready. 414 */ 415 MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY) 416 // (Thread*) setup 417 movq %gs:THREAD_SELF_OFFSET, %rdi 418 call SYMBOL(artDeliverPendingExceptionFromCode) // artDeliverPendingExceptionFromCode(Thread*) 419 UNREACHABLE 420 END_MACRO 421 422 /* 423 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 424 * exception is Thread::Current()->exception_. 425 */ 426 MACRO0(DELIVER_PENDING_EXCEPTION) 427 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save callee saves for throw 428 DELIVER_PENDING_EXCEPTION_FRAME_READY 429 END_MACRO 430 431 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) 432 DEFINE_FUNCTION VAR(c_name) 433 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 434 // Outgoing argument set up 435 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 436 call CALLVAR(cxx_name) // cxx_name(Thread*) 437 UNREACHABLE 438 END_FUNCTION VAR(c_name) 439 END_MACRO 440 441 MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name) 442 DEFINE_FUNCTION VAR(c_name) 443 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 444 // Outgoing argument set up 445 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 446 call CALLVAR(cxx_name) // cxx_name(Thread*) 447 UNREACHABLE 448 END_FUNCTION VAR(c_name) 449 END_MACRO 450 451 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name) 452 DEFINE_FUNCTION VAR(c_name) 453 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 454 // Outgoing argument set up 455 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 456 call CALLVAR(cxx_name) // cxx_name(arg1, Thread*) 457 UNREACHABLE 458 END_FUNCTION VAR(c_name) 459 END_MACRO 460 461 MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name) 462 DEFINE_FUNCTION VAR(c_name) 463 SETUP_SAVE_EVERYTHING_FRAME // save all registers as basis for long jump context 464 // Outgoing argument set up 465 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 466 call CALLVAR(cxx_name) // cxx_name(Thread*) 467 UNREACHABLE 468 END_FUNCTION VAR(c_name) 469 END_MACRO 470 471 /* 472 * Called by managed code to create and deliver a NullPointerException. 473 */ 474 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 475 476 /* 477 * Call installed by a signal handler to create and deliver a NullPointerException. 478 */ 479 DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__ 480 // Fault address and return address were saved by the fault handler. 481 // Save all registers as basis for long jump context; R15 will replace fault address later. 482 SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED 483 // Retrieve fault address and save R15. 484 movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi 485 movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp) 486 CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)) 487 // Outgoing argument set up; RDI already contains the fault address. 488 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 489 call SYMBOL(artThrowNullPointerExceptionFromSignal) // (addr, self) 490 UNREACHABLE 491 END_FUNCTION art_quick_throw_null_pointer_exception_from_signal 492 493 /* 494 * Called by managed code to create and deliver an ArithmeticException. 495 */ 496 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 497 498 /* 499 * Called by managed code to create and deliver a StackOverflowError. 500 */ 501 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 502 503 /* 504 * Called by managed code, saves callee saves and then calls artThrowException 505 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 506 */ 507 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 508 509 /* 510 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 511 * index, arg2 holds limit. 512 */ 513 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 514 515 /* 516 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 517 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 518 */ 519 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 520 521 /* 522 * All generated callsites for interface invokes and invocation slow paths will load arguments 523 * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain 524 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 525 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi. 526 * 527 * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting 528 * of the target Method* in rax and method->code_ in rdx. 529 * 530 * If unsuccessful, the helper will return null/????. There will be a pending exception in the 531 * thread and we branch to another stub to deliver it. 532 * 533 * On success this wrapper will restore arguments and *jump* to the target, leaving the return 534 * location on the stack. 535 * 536 * Adapted from x86 code. 537 */ 538 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name) 539 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves in case allocation triggers GC 540 // Helper signature is always 541 // (method_idx, *this_object, *caller_method, *self, sp) 542 543 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread 544 movq %rsp, %rcx // pass SP 545 546 call CALLVAR(cxx_name) // cxx_name(arg1, arg2, Thread*, SP) 547 // save the code pointer 548 movq %rax, %rdi 549 movq %rdx, %rax 550 RESTORE_SAVE_REFS_AND_ARGS_FRAME 551 552 testq %rdi, %rdi 553 jz 1f 554 555 // Tail call to intended method. 556 jmp *%rax 557 1: 558 DELIVER_PENDING_EXCEPTION 559 END_MACRO 560 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name) 561 DEFINE_FUNCTION VAR(c_name) 562 INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name) 563 END_FUNCTION VAR(c_name) 564 END_MACRO 565 566 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 567 568 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 569 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 570 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 571 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 572 573 574 /* 575 * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty, 576 * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters 577 * the end of the shorty. 578 */ 579 MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished) 580 1: // LOOP 581 movb (%r10), %al // al := *shorty 582 addq MACRO_LITERAL(1), %r10 // shorty++ 583 cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto xmm_setup_finished 584 je VAR(finished) 585 cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto FOUND_DOUBLE 586 je 2f 587 cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto FOUND_FLOAT 588 je 3f 589 addq MACRO_LITERAL(4), %r11 // arg_array++ 590 // Handle extra space in arg array taken by a long. 591 cmpb MACRO_LITERAL(74), %al // if (al != 'J') goto LOOP 592 jne 1b 593 addq MACRO_LITERAL(4), %r11 // arg_array++ 594 jmp 1b // goto LOOP 595 2: // FOUND_DOUBLE 596 movsd (%r11), REG_VAR(xmm_reg) 597 addq MACRO_LITERAL(8), %r11 // arg_array+=2 598 jmp 4f 599 3: // FOUND_FLOAT 600 movss (%r11), REG_VAR(xmm_reg) 601 addq MACRO_LITERAL(4), %r11 // arg_array++ 602 4: 603 END_MACRO 604 605 /* 606 * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty, 607 * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters 608 * the end of the shorty. 609 */ 610 MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished) 611 1: // LOOP 612 movb (%r10), %al // al := *shorty 613 addq MACRO_LITERAL(1), %r10 // shorty++ 614 cmpb MACRO_LITERAL(0), %al // if (al == '\0') goto gpr_setup_finished 615 je VAR(finished) 616 cmpb MACRO_LITERAL(74), %al // if (al == 'J') goto FOUND_LONG 617 je 2f 618 cmpb MACRO_LITERAL(70), %al // if (al == 'F') goto SKIP_FLOAT 619 je 3f 620 cmpb MACRO_LITERAL(68), %al // if (al == 'D') goto SKIP_DOUBLE 621 je 4f 622 movl (%r11), REG_VAR(gpr_reg32) 623 addq MACRO_LITERAL(4), %r11 // arg_array++ 624 jmp 5f 625 2: // FOUND_LONG 626 movq (%r11), REG_VAR(gpr_reg64) 627 addq MACRO_LITERAL(8), %r11 // arg_array+=2 628 jmp 5f 629 3: // SKIP_FLOAT 630 addq MACRO_LITERAL(4), %r11 // arg_array++ 631 jmp 1b 632 4: // SKIP_DOUBLE 633 addq MACRO_LITERAL(8), %r11 // arg_array+=2 634 jmp 1b 635 5: 636 END_MACRO 637 638 /* 639 * Quick invocation stub. 640 * On entry: 641 * [sp] = return address 642 * rdi = method pointer 643 * rsi = argument array that must at least contain the this pointer. 644 * rdx = size of argument array in bytes 645 * rcx = (managed) thread pointer 646 * r8 = JValue* result 647 * r9 = char* shorty 648 */ 649 DEFINE_FUNCTION art_quick_invoke_stub 650 #if defined(__APPLE__) 651 int3 652 int3 653 #else 654 // Set up argument XMM registers. 655 leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character. 656 leaq 4(%rsi), %r11 // R11 := arg_array + 4 ; ie skip this pointer. 657 LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished 658 LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished 659 LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished 660 LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished 661 LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished 662 LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished 663 LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished 664 LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished 665 .balign 16 666 .Lxmm_setup_finished: 667 PUSH rbp // Save rbp. 668 PUSH r8 // Save r8/result*. 669 PUSH r9 // Save r9/shorty*. 670 PUSH rbx // Save native callee save rbx 671 PUSH r12 // Save native callee save r12 672 PUSH r13 // Save native callee save r13 673 PUSH r14 // Save native callee save r14 674 PUSH r15 // Save native callee save r15 675 movq %rsp, %rbp // Copy value of stack pointer into base pointer. 676 CFI_DEF_CFA_REGISTER(rbp) 677 678 movl %edx, %r10d 679 addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp, 680 // r8, r9, rbx, r12, r13, r14, and r15 in frame. 681 andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes. 682 subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12, 683 // r13, r14, and r15 684 subq %rdx, %rsp // Reserve stack space for argument array. 685 686 #if (STACK_REFERENCE_SIZE != 4) 687 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected." 688 #endif 689 movq LITERAL(0), (%rsp) // Store null for method* 690 691 movl %r10d, %ecx // Place size of args in rcx. 692 movq %rdi, %rax // rax := method to be called 693 movq %rsi, %r11 // r11 := arg_array 694 leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the stack 695 // arguments. 696 // Copy arg array into stack. 697 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 698 leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character 699 movq %rax, %rdi // rdi := method to be called 700 movl (%r11), %esi // rsi := this pointer 701 addq LITERAL(4), %r11 // arg_array++ 702 LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished 703 LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished 704 LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished 705 LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished 706 .Lgpr_setup_finished: 707 call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. 708 movq %rbp, %rsp // Restore stack pointer. 709 POP r15 // Pop r15 710 POP r14 // Pop r14 711 POP r13 // Pop r13 712 POP r12 // Pop r12 713 POP rbx // Pop rbx 714 POP r9 // Pop r9 - shorty* 715 POP r8 // Pop r8 - result*. 716 POP rbp // Pop rbp 717 cmpb LITERAL(68), (%r9) // Test if result type char == 'D'. 718 je .Lreturn_double_quick 719 cmpb LITERAL(70), (%r9) // Test if result type char == 'F'. 720 je .Lreturn_float_quick 721 movq %rax, (%r8) // Store the result assuming its a long, int or Object* 722 ret 723 .Lreturn_double_quick: 724 movsd %xmm0, (%r8) // Store the double floating point result. 725 ret 726 .Lreturn_float_quick: 727 movss %xmm0, (%r8) // Store the floating point result. 728 ret 729 #endif // __APPLE__ 730 END_FUNCTION art_quick_invoke_stub 731 732 /* 733 * Quick invocation stub. 734 * On entry: 735 * [sp] = return address 736 * rdi = method pointer 737 * rsi = argument array or null if no arguments. 738 * rdx = size of argument array in bytes 739 * rcx = (managed) thread pointer 740 * r8 = JValue* result 741 * r9 = char* shorty 742 */ 743 DEFINE_FUNCTION art_quick_invoke_static_stub 744 #if defined(__APPLE__) 745 int3 746 int3 747 #else 748 // Set up argument XMM registers. 749 leaq 1(%r9), %r10 // R10 := shorty + 1 ; ie skip return arg character 750 movq %rsi, %r11 // R11 := arg_array 751 LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2 752 LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2 753 LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2 754 LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2 755 LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2 756 LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2 757 LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2 758 LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2 759 .balign 16 760 .Lxmm_setup_finished2: 761 PUSH rbp // Save rbp. 762 PUSH r8 // Save r8/result*. 763 PUSH r9 // Save r9/shorty*. 764 PUSH rbx // Save rbx 765 PUSH r12 // Save r12 766 PUSH r13 // Save r13 767 PUSH r14 // Save r14 768 PUSH r15 // Save r15 769 movq %rsp, %rbp // Copy value of stack pointer into base pointer. 770 CFI_DEF_CFA_REGISTER(rbp) 771 772 movl %edx, %r10d 773 addl LITERAL(100), %edx // Reserve space for return addr, StackReference<method>, rbp, 774 // r8, r9, r12, r13, r14, and r15 in frame. 775 andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes. 776 subl LITERAL(72), %edx // Remove space for return address, rbp, r8, r9, rbx, r12, 777 // r13, r14, and r15. 778 subq %rdx, %rsp // Reserve stack space for argument array. 779 780 #if (STACK_REFERENCE_SIZE != 4) 781 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected." 782 #endif 783 movq LITERAL(0), (%rsp) // Store null for method* 784 785 movl %r10d, %ecx // Place size of args in rcx. 786 movq %rdi, %rax // rax := method to be called 787 movq %rsi, %r11 // r11 := arg_array 788 leaq 8(%rsp), %rdi // rdi is pointing just above the ArtMethod* in the 789 // stack arguments. 790 // Copy arg array into stack. 791 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 792 leaq 1(%r9), %r10 // r10 := shorty + 1 ; ie skip return arg character 793 movq %rax, %rdi // rdi := method to be called 794 LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2 795 LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2 796 LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2 797 LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2 798 LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2 799 .Lgpr_setup_finished2: 800 call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method. 801 movq %rbp, %rsp // Restore stack pointer. 802 POP r15 // Pop r15 803 POP r14 // Pop r14 804 POP r13 // Pop r13 805 POP r12 // Pop r12 806 POP rbx // Pop rbx 807 POP r9 // Pop r9 - shorty*. 808 POP r8 // Pop r8 - result*. 809 POP rbp // Pop rbp 810 cmpb LITERAL(68), (%r9) // Test if result type char == 'D'. 811 je .Lreturn_double_quick2 812 cmpb LITERAL(70), (%r9) // Test if result type char == 'F'. 813 je .Lreturn_float_quick2 814 movq %rax, (%r8) // Store the result assuming its a long, int or Object* 815 ret 816 .Lreturn_double_quick2: 817 movsd %xmm0, (%r8) // Store the double floating point result. 818 ret 819 .Lreturn_float_quick2: 820 movss %xmm0, (%r8) // Store the floating point result. 821 ret 822 #endif // __APPLE__ 823 END_FUNCTION art_quick_invoke_static_stub 824 825 /* 826 * Long jump stub. 827 * On entry: 828 * rdi = gprs 829 * rsi = fprs 830 */ 831 DEFINE_FUNCTION art_quick_do_long_jump 832 #if defined(__APPLE__) 833 int3 834 int3 835 #else 836 // Restore FPRs. 837 movq 0(%rsi), %xmm0 838 movq 8(%rsi), %xmm1 839 movq 16(%rsi), %xmm2 840 movq 24(%rsi), %xmm3 841 movq 32(%rsi), %xmm4 842 movq 40(%rsi), %xmm5 843 movq 48(%rsi), %xmm6 844 movq 56(%rsi), %xmm7 845 movq 64(%rsi), %xmm8 846 movq 72(%rsi), %xmm9 847 movq 80(%rsi), %xmm10 848 movq 88(%rsi), %xmm11 849 movq 96(%rsi), %xmm12 850 movq 104(%rsi), %xmm13 851 movq 112(%rsi), %xmm14 852 movq 120(%rsi), %xmm15 853 // Restore FPRs. 854 movq %rdi, %rsp // RSP points to gprs. 855 // Load all registers except RSP and RIP with values in gprs. 856 popq %r15 857 popq %r14 858 popq %r13 859 popq %r12 860 popq %r11 861 popq %r10 862 popq %r9 863 popq %r8 864 popq %rdi 865 popq %rsi 866 popq %rbp 867 addq LITERAL(8), %rsp // Skip rsp 868 popq %rbx 869 popq %rdx 870 popq %rcx 871 popq %rax 872 popq %rsp // Load stack pointer. 873 ret // From higher in the stack pop rip. 874 #endif // __APPLE__ 875 END_FUNCTION art_quick_do_long_jump 876 877 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro) 878 DEFINE_FUNCTION VAR(c_name) 879 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 880 // Outgoing argument set up 881 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 882 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 883 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 884 CALL_MACRO(return_macro) // return or deliver exception 885 END_FUNCTION VAR(c_name) 886 END_MACRO 887 888 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro) 889 DEFINE_FUNCTION VAR(c_name) 890 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 891 // Outgoing argument set up 892 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 893 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) 894 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 895 CALL_MACRO(return_macro) // return or deliver exception 896 END_FUNCTION VAR(c_name) 897 END_MACRO 898 899 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro) 900 DEFINE_FUNCTION VAR(c_name) 901 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 902 // Outgoing argument set up 903 movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current() 904 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*) 905 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 906 CALL_MACRO(return_macro) // return or deliver exception 907 END_FUNCTION VAR(c_name) 908 END_MACRO 909 910 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro) 911 DEFINE_FUNCTION VAR(c_name) 912 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 913 // Outgoing argument set up 914 movq %gs:THREAD_SELF_OFFSET, %r8 // pass Thread::Current() 915 call CALLVAR(cxx_name) // cxx_name(arg1, arg2, arg3, arg4, Thread*) 916 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 917 CALL_MACRO(return_macro) // return or deliver exception 918 END_FUNCTION VAR(c_name) 919 END_MACRO 920 921 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 922 DEFINE_FUNCTION VAR(c_name) 923 SETUP_SAVE_REFS_ONLY_FRAME 924 // arg0 is in rdi 925 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 926 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 927 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 928 CALL_MACRO(return_macro) 929 END_FUNCTION VAR(c_name) 930 END_MACRO 931 932 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 933 DEFINE_FUNCTION VAR(c_name) 934 SETUP_SAVE_REFS_ONLY_FRAME 935 // arg0 and arg1 are in rdi/rsi 936 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 937 call CALLVAR(cxx_name) // (arg0, arg1, Thread*) 938 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 939 CALL_MACRO(return_macro) 940 END_FUNCTION VAR(c_name) 941 END_MACRO 942 943 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro) 944 DEFINE_FUNCTION VAR(c_name) 945 SETUP_SAVE_REFS_ONLY_FRAME 946 // arg0, arg1, and arg2 are in rdi/rsi/rdx 947 movq %gs:THREAD_SELF_OFFSET, %rcx // pass Thread::Current() 948 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, arg2, Thread*) 949 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 950 CALL_MACRO(return_macro) // return or deliver exception 951 END_FUNCTION VAR(c_name) 952 END_MACRO 953 954 // Macro for string and type resolution and initialization. 955 MACRO3(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET) 956 DEFINE_FUNCTION VAR(c_name) 957 SETUP_SAVE_EVERYTHING_FRAME \runtime_method_offset // save everything for GC 958 // Outgoing argument set up 959 movl %eax, %edi // pass string index 960 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 961 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 962 testl %eax, %eax // If result is null, deliver the OOME. 963 jz 1f 964 CFI_REMEMBER_STATE 965 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX // restore frame up to return address 966 ret 967 CFI_RESTORE_STATE 968 CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING) // workaround for clang bug: 31975598 969 1: 970 DELIVER_PENDING_EXCEPTION_FRAME_READY 971 END_FUNCTION VAR(c_name) 972 END_MACRO 973 974 MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT, c_name, cxx_name) 975 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \c_name, \cxx_name, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 976 END_MACRO 977 978 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER) 979 testq %rax, %rax // rax == 0 ? 980 jz 1f // if rax == 0 goto 1 981 ret // return 982 1: // deliver exception on current thread 983 DELIVER_PENDING_EXCEPTION 984 END_MACRO 985 986 MACRO0(RETURN_IF_EAX_ZERO) 987 testl %eax, %eax // eax == 0 ? 988 jnz 1f // if eax != 0 goto 1 989 ret // return 990 1: // deliver exception on current thread 991 DELIVER_PENDING_EXCEPTION 992 END_MACRO 993 994 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION) 995 movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field 996 testq %rcx, %rcx // rcx == 0 ? 997 jnz 1f // if rcx != 0 goto 1 998 ret // return 999 1: // deliver exception on current thread 1000 DELIVER_PENDING_EXCEPTION 1001 END_MACRO 1002 1003 // Generate the allocation entrypoints for each allocator. 1004 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1005 1006 // Comment out allocators that have x86_64 specific asm. 1007 // Region TLAB: 1008 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1009 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1010 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1011 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1012 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1013 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1014 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1015 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1016 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1017 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1018 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1019 // Normal TLAB: 1020 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1021 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1022 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1023 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1024 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1025 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1026 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1027 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1028 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1029 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1030 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1031 1032 1033 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc). 1034 MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name) 1035 DEFINE_FUNCTION VAR(c_name) 1036 // Fast path rosalloc allocation. 1037 // RDI: mirror::Class*, RAX: return value 1038 // RSI, RDX, RCX, R8, R9: free. 1039 // Check if the thread local 1040 // allocation stack has room. 1041 movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread 1042 movq THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx // rcx = alloc stack top. 1043 cmpq THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx 1044 jae .Lslow_path\c_name 1045 // Load the object size 1046 movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax 1047 // Check if the size is for a thread 1048 // local allocation. Also does the 1049 // initialized and finalizable checks. 1050 cmpl LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax 1051 ja .Lslow_path\c_name 1052 // Compute the rosalloc bracket index 1053 // from the size. 1054 shrq LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax 1055 // Load the rosalloc run (r9) 1056 // Subtract __SIZEOF_POINTER__ to 1057 // subtract one from edi as there is no 1058 // 0 byte run and the size is already 1059 // aligned. 1060 movq (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9 1061 // Load the free list head (rax). This 1062 // will be the return val. 1063 movq (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax 1064 testq %rax, %rax 1065 jz .Lslow_path\c_name 1066 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi. 1067 // Push the new object onto the thread 1068 // local allocation stack and 1069 // increment the thread local 1070 // allocation stack top. 1071 movl %eax, (%rcx) 1072 addq LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx 1073 movq %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8) 1074 // Load the next pointer of the head 1075 // and update the list head with the 1076 // next pointer. 1077 movq ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx 1078 movq %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9) 1079 // Store the class pointer in the 1080 // header. This also overwrites the 1081 // next pointer. The offsets are 1082 // asserted to match. 1083 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1084 #error "Class pointer needs to overwrite next pointer." 1085 #endif 1086 POISON_HEAP_REF edi 1087 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1088 // Decrement the size of the free list 1089 decl (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9) 1090 // No fence necessary for x86. 1091 ret 1092 .Lslow_path\c_name: 1093 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1094 // Outgoing argument set up 1095 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1096 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 1097 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1098 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1099 END_FUNCTION VAR(c_name) 1100 END_MACRO 1101 1102 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc 1103 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc 1104 1105 // The common fast path code for art_quick_alloc_object_resolved_region_tlab. 1106 // TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as 1107 // ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH. 1108 // 1109 // RDI: the class, RAX: return value. 1110 // RCX, RSI, RDX: scratch, r8: Thread::Current(). 1111 MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel) 1112 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel)) 1113 END_MACRO 1114 1115 // The fast path code for art_quick_alloc_object_initialized_region_tlab. 1116 // 1117 // RDI: the class, RSI: ArtMethod*, RAX: return value. 1118 // RCX, RSI, RDX: scratch, r8: Thread::Current(). 1119 MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel) 1120 movq %gs:THREAD_SELF_OFFSET, %r8 // r8 = thread 1121 movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size. 1122 movq THREAD_LOCAL_POS_OFFSET(%r8), %rax 1123 addq %rax, %rcx // Add size to pos, note that these 1124 // are both 32 bit ints, overflow 1125 // will cause the add to be past the 1126 // end of the thread local region. 1127 cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx // Check if it fits. 1128 ja RAW_VAR(slowPathLabel) 1129 movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8) // Update thread_local_pos. 1130 incq THREAD_LOCAL_OBJECTS_OFFSET(%r8) // Increase thread_local_objects. 1131 // Store the class pointer in the 1132 // header. 1133 // No fence needed for x86. 1134 POISON_HEAP_REF edi 1135 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1136 ret // Fast path succeeded. 1137 END_MACRO 1138 1139 // The fast path code for art_quick_alloc_array_region_tlab. 1140 // Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size 1141 // Free temps: RCX, RDX, R8 1142 // Output: RAX: return value. 1143 MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel) 1144 movq %gs:THREAD_SELF_OFFSET, %rcx // rcx = thread 1145 // Mask out the unaligned part to make sure we are 8 byte aligned. 1146 andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9 1147 movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax 1148 addq %rax, %r9 1149 cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9 // Check if it fits. 1150 ja RAW_VAR(slowPathLabel) 1151 movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx) // Update thread_local_pos. 1152 addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx) // Increase thread_local_objects. 1153 // Store the class pointer in the 1154 // header. 1155 // No fence needed for x86. 1156 POISON_HEAP_REF edi 1157 movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax) 1158 movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax) 1159 ret // Fast path succeeded. 1160 END_MACRO 1161 1162 // The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab 1163 // and art_quick_alloc_object_{resolved, initialized}_region_tlab. 1164 MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name) 1165 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1166 // Outgoing argument set up 1167 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1168 call CALLVAR(cxx_name) // cxx_name(arg0, Thread*) 1169 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1170 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1171 END_MACRO 1172 1173 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be 1174 // called with CC if the GC is not active. 1175 DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab 1176 // RDI: mirror::Class* klass 1177 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1178 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path 1179 .Lart_quick_alloc_object_resolved_tlab_slow_path: 1180 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB 1181 END_FUNCTION art_quick_alloc_object_resolved_tlab 1182 1183 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB). 1184 // May be called with CC if the GC is not active. 1185 DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab 1186 // RDI: mirror::Class* klass 1187 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1188 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path 1189 .Lart_quick_alloc_object_initialized_tlab_slow_path: 1190 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB 1191 END_FUNCTION art_quick_alloc_object_initialized_tlab 1192 1193 MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN) 1194 movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx // Load component type. 1195 UNPOISON_HEAP_REF ecx 1196 movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type. 1197 shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx // Get component size shift. 1198 movq %rsi, %r9 1199 salq %cl, %r9 // Calculate array count shifted. 1200 // Add array header + alignment rounding. 1201 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1202 // Add 4 extra bytes if we are doing a long array. 1203 addq MACRO_LITERAL(1), %rcx 1204 andq MACRO_LITERAL(4), %rcx 1205 #if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1206 #error Long array data offset must be 4 greater than int array data offset. 1207 #endif 1208 addq %rcx, %r9 1209 END_MACRO 1210 1211 MACRO0(COMPUTE_ARRAY_SIZE_8) 1212 // RDI: mirror::Class* klass, RSI: int32_t component_count 1213 // RDX, RCX, R8, R9: free. RAX: return val. 1214 movq %rsi, %r9 1215 // Add array header + alignment rounding. 1216 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1217 END_MACRO 1218 1219 MACRO0(COMPUTE_ARRAY_SIZE_16) 1220 // RDI: mirror::Class* klass, RSI: int32_t component_count 1221 // RDX, RCX, R8, R9: free. RAX: return val. 1222 movq %rsi, %r9 1223 salq MACRO_LITERAL(1), %r9 1224 // Add array header + alignment rounding. 1225 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1226 END_MACRO 1227 1228 MACRO0(COMPUTE_ARRAY_SIZE_32) 1229 // RDI: mirror::Class* klass, RSI: int32_t component_count 1230 // RDX, RCX, R8, R9: free. RAX: return val. 1231 movq %rsi, %r9 1232 salq MACRO_LITERAL(2), %r9 1233 // Add array header + alignment rounding. 1234 addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1235 END_MACRO 1236 1237 MACRO0(COMPUTE_ARRAY_SIZE_64) 1238 // RDI: mirror::Class* klass, RSI: int32_t component_count 1239 // RDX, RCX, R8, R9: free. RAX: return val. 1240 movq %rsi, %r9 1241 salq MACRO_LITERAL(3), %r9 1242 // Add array header + alignment rounding. 1243 addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9 1244 END_MACRO 1245 1246 MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup) 1247 DEFINE_FUNCTION VAR(c_entrypoint) 1248 // RDI: mirror::Class* klass, RSI: int32_t component_count 1249 // RDX, RCX, R8, R9: free. RAX: return val. 1250 CALL_MACRO(size_setup) 1251 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint 1252 .Lslow_path\c_entrypoint: 1253 SETUP_SAVE_REFS_ONLY_FRAME // save ref containing registers for GC 1254 // Outgoing argument set up 1255 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 1256 call CALLVAR(cxx_name) // cxx_name(arg0, arg1, Thread*) 1257 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1258 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER // return or deliver exception 1259 END_FUNCTION VAR(c_entrypoint) 1260 END_MACRO 1261 1262 1263 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1264 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1265 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1266 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1267 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1268 1269 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1270 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1271 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1272 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1273 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1274 1275 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB). 1276 DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab 1277 // Fast path region tlab allocation. 1278 // RDI: mirror::Class* klass 1279 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1280 ASSERT_USE_READ_BARRIER 1281 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path 1282 .Lart_quick_alloc_object_resolved_region_tlab_slow_path: 1283 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB 1284 END_FUNCTION art_quick_alloc_object_resolved_region_tlab 1285 1286 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB). 1287 DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab 1288 // Fast path region tlab allocation. 1289 // RDI: mirror::Class* klass 1290 // RDX, RSI, RCX, R8, R9: free. RAX: return val. 1291 ASSERT_USE_READ_BARRIER 1292 // No read barrier since the caller is responsible for that. 1293 ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path 1294 .Lart_quick_alloc_object_initialized_region_tlab_slow_path: 1295 ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB 1296 END_FUNCTION art_quick_alloc_object_initialized_region_tlab 1297 1298 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 1299 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode 1300 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode 1301 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 1302 1303 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO 1304 1305 DEFINE_FUNCTION art_quick_lock_object 1306 testl %edi, %edi // Null check object/rdi. 1307 jz .Lslow_lock 1308 .Lretry_lock: 1309 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word. 1310 test LITERAL(LOCK_WORD_STATE_MASK), %ecx // Test the 2 high bits. 1311 jne .Lslow_lock // Slow path if either of the two high bits are set. 1312 movl %ecx, %edx // save lock word (edx) to keep read barrier bits. 1313 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. 1314 test %ecx, %ecx 1315 jnz .Lalready_thin // Lock word contains a thin lock. 1316 // unlocked case - edx: original lock word, edi: obj. 1317 movl %edx, %eax // eax: lock word zero except for read barrier bits. 1318 movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id 1319 or %eax, %edx // edx: thread id with count of 0 + read barrier bits. 1320 lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1321 jnz .Lretry_lock // cmpxchg failed retry 1322 ret 1323 .Lalready_thin: // edx: lock word (with high 2 bits zero and original rb bits), edi: obj. 1324 movl %gs:THREAD_ID_OFFSET, %ecx // ecx := thread id 1325 cmpw %cx, %dx // do we hold the lock already? 1326 jne .Lslow_lock 1327 movl %edx, %ecx // copy the lock word to check count overflow. 1328 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx // zero the gc bits. 1329 addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx // increment recursion count 1330 test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx // overflowed if the upper bit (28) is set 1331 jne .Lslow_lock // count overflowed so go slow 1332 movl %edx, %eax // copy the lock word as the old val for cmpxchg. 1333 addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx // increment recursion count again for real. 1334 // update lockword, cmpxchg necessary for read barrier bits. 1335 lock cmpxchg %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, edx: new val. 1336 jnz .Lretry_lock // cmpxchg failed retry 1337 ret 1338 .Lslow_lock: 1339 SETUP_SAVE_REFS_ONLY_FRAME 1340 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1341 call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) 1342 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1343 RETURN_IF_EAX_ZERO 1344 END_FUNCTION art_quick_lock_object 1345 1346 DEFINE_FUNCTION art_quick_lock_object_no_inline 1347 SETUP_SAVE_REFS_ONLY_FRAME 1348 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1349 call SYMBOL(artLockObjectFromCode) // artLockObjectFromCode(object, Thread*) 1350 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1351 RETURN_IF_EAX_ZERO 1352 END_FUNCTION art_quick_lock_object_no_inline 1353 1354 DEFINE_FUNCTION art_quick_unlock_object 1355 testl %edi, %edi // null check object/edi 1356 jz .Lslow_unlock 1357 .Lretry_unlock: 1358 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx // ecx := lock word 1359 movl %gs:THREAD_ID_OFFSET, %edx // edx := thread id 1360 test LITERAL(LOCK_WORD_STATE_MASK), %ecx 1361 jnz .Lslow_unlock // lock word contains a monitor 1362 cmpw %cx, %dx // does the thread id match? 1363 jne .Lslow_unlock 1364 movl %ecx, %edx // copy the lock word to detect new count of 0. 1365 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx // zero the gc bits. 1366 cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx 1367 jae .Lrecursive_thin_unlock 1368 // update lockword, cmpxchg necessary for read barrier bits. 1369 movl %ecx, %eax // eax: old lock word. 1370 andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx // ecx: new lock word zero except original gc bits. 1371 #ifndef USE_READ_BARRIER 1372 movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1373 #else 1374 lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. 1375 jnz .Lretry_unlock // cmpxchg failed retry 1376 #endif 1377 ret 1378 .Lrecursive_thin_unlock: // ecx: original lock word, edi: obj 1379 // update lockword, cmpxchg necessary for read barrier bits. 1380 movl %ecx, %eax // eax: old lock word. 1381 subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx 1382 #ifndef USE_READ_BARRIER 1383 mov %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) 1384 #else 1385 lock cmpxchg %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi) // eax: old val, ecx: new val. 1386 jnz .Lretry_unlock // cmpxchg failed retry 1387 #endif 1388 ret 1389 .Lslow_unlock: 1390 SETUP_SAVE_REFS_ONLY_FRAME 1391 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1392 call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) 1393 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1394 RETURN_IF_EAX_ZERO 1395 END_FUNCTION art_quick_unlock_object 1396 1397 DEFINE_FUNCTION art_quick_unlock_object_no_inline 1398 SETUP_SAVE_REFS_ONLY_FRAME 1399 movq %gs:THREAD_SELF_OFFSET, %rsi // pass Thread::Current() 1400 call SYMBOL(artUnlockObjectFromCode) // artUnlockObjectFromCode(object, Thread*) 1401 RESTORE_SAVE_REFS_ONLY_FRAME // restore frame up to return address 1402 RETURN_IF_EAX_ZERO 1403 END_FUNCTION art_quick_unlock_object_no_inline 1404 1405 DEFINE_FUNCTION art_quick_check_instance_of 1406 // We could check the super classes here but that is usually already checked in the caller. 1407 PUSH rdi // Save args for exc 1408 PUSH rsi 1409 subq LITERAL(8), %rsp // Alignment padding. 1410 CFI_ADJUST_CFA_OFFSET(8) 1411 SETUP_FP_CALLEE_SAVE_FRAME 1412 call SYMBOL(artInstanceOfFromCode) // (Object* obj, Class* ref_klass) 1413 testq %rax, %rax 1414 jz .Lthrow_class_cast_exception // jump forward if not assignable 1415 CFI_REMEMBER_STATE 1416 RESTORE_FP_CALLEE_SAVE_FRAME 1417 addq LITERAL(24), %rsp // pop arguments 1418 CFI_ADJUST_CFA_OFFSET(-24) 1419 ret 1420 CFI_RESTORE_STATE // Reset unwind info so following code unwinds. 1421 1422 .Lthrow_class_cast_exception: 1423 RESTORE_FP_CALLEE_SAVE_FRAME 1424 addq LITERAL(8), %rsp // pop padding 1425 CFI_ADJUST_CFA_OFFSET(-8) 1426 POP rsi // Pop arguments 1427 POP rdi 1428 1429 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context 1430 mov %gs:THREAD_SELF_OFFSET, %rdx // pass Thread::Current() 1431 call SYMBOL(artThrowClassCastExceptionForObject) // (Object* src, Class* dest, Thread*) 1432 UNREACHABLE 1433 END_FUNCTION art_quick_check_instance_of 1434 1435 1436 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack. 1437 MACRO2(POP_REG_NE, reg, exclude_reg) 1438 .ifc RAW_VAR(reg), RAW_VAR(exclude_reg) 1439 addq MACRO_LITERAL(8), %rsp 1440 CFI_ADJUST_CFA_OFFSET(-8) 1441 .else 1442 POP RAW_VAR(reg) 1443 .endif 1444 END_MACRO 1445 1446 /* 1447 * Macro to insert read barrier, used in art_quick_aput_obj. 1448 * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as 1449 * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between 1450 * 64b PUSH/POP and 32b argument. 1451 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 1452 * 1453 * As with art_quick_aput_obj function, the 64b versions are in comments. 1454 */ 1455 MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64) 1456 #ifdef USE_READ_BARRIER 1457 PUSH rax // save registers that might be used 1458 PUSH rdi 1459 PUSH rsi 1460 PUSH rdx 1461 PUSH rcx 1462 SETUP_FP_CALLEE_SAVE_FRAME 1463 // Outgoing argument set up 1464 // movl REG_VAR(ref_reg32), %edi // pass ref, no-op for now since parameter ref is unused 1465 // // movq REG_VAR(ref_reg64), %rdi 1466 movl REG_VAR(obj_reg), %esi // pass obj_reg 1467 // movq REG_VAR(obj_reg), %rsi 1468 movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary 1469 // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx 1470 call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj_reg, offset) 1471 // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning. 1472 .ifnc RAW_VAR(dest_reg32), eax 1473 // .ifnc RAW_VAR(dest_reg64), rax 1474 movl %eax, REG_VAR(dest_reg32) // save loaded ref in dest_reg 1475 // movq %rax, REG_VAR(dest_reg64) 1476 .endif 1477 RESTORE_FP_CALLEE_SAVE_FRAME 1478 POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg 1479 POP_REG_NE rdx, RAW_VAR(dest_reg64) 1480 POP_REG_NE rsi, RAW_VAR(dest_reg64) 1481 POP_REG_NE rdi, RAW_VAR(dest_reg64) 1482 POP_REG_NE rax, RAW_VAR(dest_reg64) 1483 #else 1484 movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32) 1485 // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64) 1486 UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register 1487 #endif // USE_READ_BARRIER 1488 END_MACRO 1489 1490 DEFINE_FUNCTION art_quick_aput_obj 1491 testl %edx, %edx // store of null 1492 // test %rdx, %rdx 1493 jz .Ldo_aput_null 1494 READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx 1495 // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx 1496 READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx 1497 // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx 1498 #if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER) 1499 READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax // rax is free. 1500 // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax 1501 cmpl %eax, %ecx // value's type == array's component type - trivial assignability 1502 #else 1503 cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability 1504 // cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx 1505 #endif 1506 jne .Lcheck_assignability 1507 .Ldo_aput: 1508 POISON_HEAP_REF edx 1509 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1510 // movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1511 movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx 1512 shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi 1513 // shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi 1514 movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero 1515 ret 1516 .Ldo_aput_null: 1517 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1518 // movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1519 ret 1520 .Lcheck_assignability: 1521 // Save arguments. 1522 PUSH rdi 1523 PUSH rsi 1524 PUSH rdx 1525 SETUP_FP_CALLEE_SAVE_FRAME 1526 1527 #if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER) 1528 // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value. 1529 movl %eax, %esi // Pass arg2 = value's class. 1530 // movq %rax, %rsi 1531 #else 1532 // "Uncompress" = do nothing, as already zero-extended on load. 1533 movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class. 1534 #endif 1535 movq %rcx, %rdi // Pass arg1 = array's component type. 1536 1537 call SYMBOL(artIsAssignableFromCode) // (Class* a, Class* b) 1538 1539 // Exception? 1540 testq %rax, %rax 1541 jz .Lthrow_array_store_exception 1542 1543 RESTORE_FP_CALLEE_SAVE_FRAME 1544 // Restore arguments. 1545 POP rdx 1546 POP rsi 1547 POP rdi 1548 1549 POISON_HEAP_REF edx 1550 movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4) 1551 // movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4) 1552 movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx 1553 shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi 1554 // shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi 1555 movb %dl, (%rdx, %rdi) // Note: this assumes that top 32b of %rdi are zero 1556 // movb %dl, (%rdx, %rdi) 1557 ret 1558 CFI_ADJUST_CFA_OFFSET(24 + 4 * 8) // Reset unwind info so following code unwinds. 1559 .Lthrow_array_store_exception: 1560 RESTORE_FP_CALLEE_SAVE_FRAME 1561 // Restore arguments. 1562 POP rdx 1563 POP rsi 1564 POP rdi 1565 1566 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // Save all registers as basis for long jump context. 1567 1568 // Outgoing argument set up. 1569 movq %rdx, %rsi // Pass arg 2 = value. 1570 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass arg 3 = Thread::Current(). 1571 // Pass arg 1 = array. 1572 call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*) 1573 UNREACHABLE 1574 END_FUNCTION art_quick_aput_obj 1575 1576 // TODO: This is quite silly on X86_64 now. 1577 DEFINE_FUNCTION art_quick_memcpy 1578 call PLT_SYMBOL(memcpy) // (void*, const void*, size_t) 1579 ret 1580 END_FUNCTION art_quick_memcpy 1581 1582 DEFINE_FUNCTION art_quick_test_suspend 1583 SETUP_SAVE_EVERYTHING_FRAME RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET // save everything for GC 1584 // Outgoing argument set up 1585 movq %gs:THREAD_SELF_OFFSET, %rdi // pass Thread::Current() 1586 call SYMBOL(artTestSuspendFromCode) // (Thread*) 1587 RESTORE_SAVE_EVERYTHING_FRAME // restore frame up to return address 1588 ret 1589 END_FUNCTION art_quick_test_suspend 1590 1591 UNIMPLEMENTED art_quick_ldiv 1592 UNIMPLEMENTED art_quick_lmod 1593 UNIMPLEMENTED art_quick_lmul 1594 UNIMPLEMENTED art_quick_lshl 1595 UNIMPLEMENTED art_quick_lshr 1596 UNIMPLEMENTED art_quick_lushr 1597 1598 // Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 1599 // defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 1600 1601 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1602 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1603 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1604 THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1605 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO 1606 1607 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1608 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1609 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1610 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1611 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1612 TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1613 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1614 1615 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1616 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1617 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO 1618 TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1619 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO 1620 1621 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1622 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1623 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1624 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1625 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1626 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1627 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION 1628 1629 DEFINE_FUNCTION art_quick_proxy_invoke_handler 1630 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI 1631 1632 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass Thread::Current(). 1633 movq %rsp, %rcx // Pass SP. 1634 call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP) 1635 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1636 movq %rax, %xmm0 // Copy return value in case of float returns. 1637 RETURN_OR_DELIVER_PENDING_EXCEPTION 1638 END_FUNCTION art_quick_proxy_invoke_handler 1639 1640 /* 1641 * Called to resolve an imt conflict. 1642 * rdi is the conflict ArtMethod. 1643 * rax is a hidden argument that holds the target interface method's dex method index. 1644 * 1645 * Note that this stub writes to r10 and rdi. 1646 */ 1647 DEFINE_FUNCTION art_quick_imt_conflict_trampoline 1648 #if defined(__APPLE__) 1649 int3 1650 int3 1651 #else 1652 movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer. 1653 // If the method is obsolete, just go through the dex cache miss slow path. 1654 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1655 testl LITERAL(ACC_OBSOLETE_METHOD), ART_METHOD_ACCESS_FLAGS_OFFSET(%r10) 1656 jnz .Limt_conflict_trampoline_dex_cache_miss 1657 movl ART_METHOD_DECLARING_CLASS_OFFSET(%r10), %r10d // Load declaring class (no read barrier). 1658 movl MIRROR_CLASS_DEX_CACHE_OFFSET(%r10), %r10d // Load the DexCache (without read barrier). 1659 UNPOISON_HEAP_REF r10d 1660 movq MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET(%r10), %r10 // Load the resolved methods. 1661 mov %eax, %r11d // Remember method index in R11. 1662 andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax // Calculate DexCache method slot index. 1663 shll LITERAL(1), %eax // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__. 1664 leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address. 1665 PUSH rdx // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B. 1666 mov %rcx, %rdx // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes. 1667 mov %rbx, %rax // (The actual value does not matter.) 1668 lock cmpxchg16b (%r10) // Relaxed atomic load RDX:RAX from the dex cache slot. 1669 movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi // Load ImtConflictTable 1670 cmp %rdx, %r11 // Compare method index to see if we had a DexCache method hit. 1671 jne .Limt_conflict_trampoline_dex_cache_miss 1672 .Limt_table_iterate: 1673 cmpq %rax, 0(%rdi) 1674 jne .Limt_table_next_entry 1675 // We successfully hit an entry in the table. Load the target method 1676 // and jump to it. 1677 movq __SIZEOF_POINTER__(%rdi), %rdi 1678 CFI_REMEMBER_STATE 1679 POP rdx 1680 jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) 1681 CFI_RESTORE_STATE 1682 .Limt_table_next_entry: 1683 // If the entry is null, the interface method is not in the ImtConflictTable. 1684 cmpq LITERAL(0), 0(%rdi) 1685 jz .Lconflict_trampoline 1686 // Iterate over the entries of the ImtConflictTable. 1687 addq LITERAL(2 * __SIZEOF_POINTER__), %rdi 1688 jmp .Limt_table_iterate 1689 .Lconflict_trampoline: 1690 // Call the runtime stub to populate the ImtConflictTable and jump to the 1691 // resolved method. 1692 CFI_REMEMBER_STATE 1693 POP rdx 1694 movq %rax, %rdi // Load interface method 1695 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1696 CFI_RESTORE_STATE 1697 .Limt_conflict_trampoline_dex_cache_miss: 1698 // We're not creating a proper runtime method frame here, 1699 // artLookupResolvedMethod() is not allowed to walk the stack. 1700 1701 // Save GPR args and ImtConflictTable; RDX is already saved. 1702 PUSH r9 // Quick arg 5. 1703 PUSH r8 // Quick arg 4. 1704 PUSH rsi // Quick arg 1. 1705 PUSH rcx // Quick arg 3. 1706 PUSH rdi // ImtConflictTable 1707 // Save FPR args and callee-saves, align stack to 16B. 1708 subq MACRO_LITERAL(12 * 8 + 8), %rsp 1709 CFI_ADJUST_CFA_OFFSET(12 * 8 + 8) 1710 movq %xmm0, 0(%rsp) 1711 movq %xmm1, 8(%rsp) 1712 movq %xmm2, 16(%rsp) 1713 movq %xmm3, 24(%rsp) 1714 movq %xmm4, 32(%rsp) 1715 movq %xmm5, 40(%rsp) 1716 movq %xmm6, 48(%rsp) 1717 movq %xmm7, 56(%rsp) 1718 movq %xmm12, 64(%rsp) // XMM12-15 are callee-save in ART compiled code ABI 1719 movq %xmm13, 72(%rsp) // but caller-save in native ABI. 1720 movq %xmm14, 80(%rsp) 1721 movq %xmm15, 88(%rsp) 1722 1723 movq %r11, %rdi // Pass method index. 1724 movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi // Pass referrer. 1725 call SYMBOL(artLookupResolvedMethod) // (uint32_t method_index, ArtMethod* referrer) 1726 1727 // Restore FPRs. 1728 movq 0(%rsp), %xmm0 1729 movq 8(%rsp), %xmm1 1730 movq 16(%rsp), %xmm2 1731 movq 24(%rsp), %xmm3 1732 movq 32(%rsp), %xmm4 1733 movq 40(%rsp), %xmm5 1734 movq 48(%rsp), %xmm6 1735 movq 56(%rsp), %xmm7 1736 movq 64(%rsp), %xmm12 1737 movq 72(%rsp), %xmm13 1738 movq 80(%rsp), %xmm14 1739 movq 88(%rsp), %xmm15 1740 addq MACRO_LITERAL(12 * 8 + 8), %rsp 1741 CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8)) 1742 // Restore ImtConflictTable and GPR args. 1743 POP rdi 1744 POP rcx 1745 POP rsi 1746 POP r8 1747 POP r9 1748 1749 cmp LITERAL(0), %rax // If the method wasn't resolved, 1750 je .Lconflict_trampoline // skip the lookup and go to artInvokeInterfaceTrampoline(). 1751 jmp .Limt_table_iterate 1752 #endif // __APPLE__ 1753 END_FUNCTION art_quick_imt_conflict_trampoline 1754 1755 DEFINE_FUNCTION art_quick_resolution_trampoline 1756 SETUP_SAVE_REFS_AND_ARGS_FRAME 1757 movq %gs:THREAD_SELF_OFFSET, %rdx 1758 movq %rsp, %rcx 1759 call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP) 1760 movq %rax, %r10 // Remember returned code pointer in R10. 1761 movq (%rsp), %rdi // Load called method into RDI. 1762 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1763 testq %r10, %r10 // If code pointer is null goto deliver pending exception. 1764 jz 1f 1765 jmp *%r10 // Tail call into method. 1766 1: 1767 DELIVER_PENDING_EXCEPTION 1768 END_FUNCTION art_quick_resolution_trampoline 1769 1770 /* Generic JNI frame layout: 1771 * 1772 * #-------------------# 1773 * | | 1774 * | caller method... | 1775 * #-------------------# <--- SP on entry 1776 * 1777 * | 1778 * V 1779 * 1780 * #-------------------# 1781 * | caller method... | 1782 * #-------------------# 1783 * | Return | 1784 * | R15 | callee save 1785 * | R14 | callee save 1786 * | R13 | callee save 1787 * | R12 | callee save 1788 * | R9 | arg5 1789 * | R8 | arg4 1790 * | RSI/R6 | arg1 1791 * | RBP/R5 | callee save 1792 * | RBX/R3 | callee save 1793 * | RDX/R2 | arg2 1794 * | RCX/R1 | arg3 1795 * | XMM7 | float arg 8 1796 * | XMM6 | float arg 7 1797 * | XMM5 | float arg 6 1798 * | XMM4 | float arg 5 1799 * | XMM3 | float arg 4 1800 * | XMM2 | float arg 3 1801 * | XMM1 | float arg 2 1802 * | XMM0 | float arg 1 1803 * | RDI/Method* | <- sp 1804 * #-------------------# 1805 * | Scratch Alloca | 5K scratch space 1806 * #---------#---------# 1807 * | | sp* | 1808 * | Tramp. #---------# 1809 * | args | thread | 1810 * | Tramp. #---------# 1811 * | | method | 1812 * #-------------------# <--- SP on artQuickGenericJniTrampoline 1813 * 1814 * | 1815 * v artQuickGenericJniTrampoline 1816 * 1817 * #-------------------# 1818 * | caller method... | 1819 * #-------------------# 1820 * | Return | 1821 * | Callee-Save Data | 1822 * #-------------------# 1823 * | handle scope | 1824 * #-------------------# 1825 * | Method* | <--- (1) 1826 * #-------------------# 1827 * | local ref cookie | // 4B 1828 * | handle scope size | // 4B TODO: roll into call stack alignment? 1829 * #-------------------# 1830 * | JNI Call Stack | 1831 * #-------------------# <--- SP on native call 1832 * | | 1833 * | Stack for Regs | The trampoline assembly will pop these values 1834 * | | into registers for native call 1835 * #-------------------# 1836 * | Native code ptr | 1837 * #-------------------# 1838 * | Free scratch | 1839 * #-------------------# 1840 * | Ptr to (1) | <--- RSP 1841 * #-------------------# 1842 */ 1843 /* 1844 * Called to do a generic JNI down-call 1845 */ 1846 DEFINE_FUNCTION art_quick_generic_jni_trampoline 1847 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI 1848 1849 movq %rsp, %rbp // save SP at (old) callee-save frame 1850 CFI_DEF_CFA_REGISTER(rbp) 1851 1852 // 1853 // reserve a lot of space 1854 // 1855 // 4 local state ref 1856 // 4 padding 1857 // 4196 4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?) 1858 // 16 handle scope member fields ? 1859 // + 112 14x 8-byte stack-2-register space 1860 // ------ 1861 // 4332 1862 // 16-byte aligned: 4336 1863 // Note: 14x8 = 7*16, so the stack stays aligned for the native call... 1864 // Also means: the padding is somewhere in the middle 1865 // 1866 // 1867 // New test: use 5K and release 1868 // 5k = 5120 1869 subq LITERAL(5120), %rsp 1870 // prepare for artQuickGenericJniTrampoline call 1871 // (Thread*, SP) 1872 // rdi rsi <= C calling convention 1873 // gs:... rbp <= where they are 1874 movq %gs:THREAD_SELF_OFFSET, %rdi 1875 movq %rbp, %rsi 1876 call SYMBOL(artQuickGenericJniTrampoline) // (Thread*, sp) 1877 1878 // The C call will have registered the complete save-frame on success. 1879 // The result of the call is: 1880 // %rax: pointer to native code, 0 on error. 1881 // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there. 1882 1883 // Check for error = 0. 1884 test %rax, %rax 1885 jz .Lexception_in_native 1886 1887 // Release part of the alloca. 1888 movq %rdx, %rsp 1889 1890 // pop from the register-passing alloca region 1891 // what's the right layout? 1892 popq %rdi 1893 popq %rsi 1894 popq %rdx 1895 popq %rcx 1896 popq %r8 1897 popq %r9 1898 // TODO: skip floating point if unused, some flag. 1899 movq 0(%rsp), %xmm0 1900 movq 8(%rsp), %xmm1 1901 movq 16(%rsp), %xmm2 1902 movq 24(%rsp), %xmm3 1903 movq 32(%rsp), %xmm4 1904 movq 40(%rsp), %xmm5 1905 movq 48(%rsp), %xmm6 1906 movq 56(%rsp), %xmm7 1907 addq LITERAL(64), %rsp // floating-point done 1908 1909 // native call 1910 call *%rax 1911 1912 // result sign extension is handled in C code 1913 // prepare for artQuickGenericJniEndTrampoline call 1914 // (Thread*, result, result_f) 1915 // rdi rsi rdx <= C calling convention 1916 // gs:... rax xmm0 <= where they are 1917 movq %gs:THREAD_SELF_OFFSET, %rdi 1918 movq %rax, %rsi 1919 movq %xmm0, %rdx 1920 call SYMBOL(artQuickGenericJniEndTrampoline) 1921 1922 // Pending exceptions possible. 1923 // TODO: use cmpq, needs direct encoding because of gas bug 1924 movq %gs:THREAD_EXCEPTION_OFFSET, %rcx 1925 test %rcx, %rcx 1926 jnz .Lexception_in_native 1927 1928 // Tear down the alloca. 1929 movq %rbp, %rsp 1930 CFI_DEF_CFA_REGISTER(rsp) 1931 1932 // Tear down the callee-save frame. 1933 // Load FPRs. 1934 // movq %xmm0, 16(%rsp) // doesn't make sense!!! 1935 movq 24(%rsp), %xmm1 // neither does this!!! 1936 movq 32(%rsp), %xmm2 1937 movq 40(%rsp), %xmm3 1938 movq 48(%rsp), %xmm4 1939 movq 56(%rsp), %xmm5 1940 movq 64(%rsp), %xmm6 1941 movq 72(%rsp), %xmm7 1942 movq 80(%rsp), %xmm12 1943 movq 88(%rsp), %xmm13 1944 movq 96(%rsp), %xmm14 1945 movq 104(%rsp), %xmm15 1946 // was 80 bytes 1947 addq LITERAL(80 + 4*8), %rsp 1948 CFI_ADJUST_CFA_OFFSET(-80 - 4*8) 1949 // Save callee and GPR args, mixed together to agree with core spills bitmap. 1950 POP rcx // Arg. 1951 POP rdx // Arg. 1952 POP rbx // Callee save. 1953 POP rbp // Callee save. 1954 POP rsi // Arg. 1955 POP r8 // Arg. 1956 POP r9 // Arg. 1957 POP r12 // Callee save. 1958 POP r13 // Callee save. 1959 POP r14 // Callee save. 1960 POP r15 // Callee save. 1961 // store into fpr, for when it's a fpr return... 1962 movq %rax, %xmm0 1963 ret 1964 .Lexception_in_native: 1965 pushq %gs:THREAD_TOP_QUICK_FRAME_OFFSET 1966 addq LITERAL(-1), (%rsp) // Remove the GenericJNI tag. 1967 movq (%rsp), %rsp 1968 CFI_DEF_CFA_REGISTER(rsp) 1969 // Do a call to push a new save-all frame required by the runtime. 1970 call .Lexception_call 1971 .Lexception_call: 1972 DELIVER_PENDING_EXCEPTION 1973 END_FUNCTION art_quick_generic_jni_trampoline 1974 1975 /* 1976 * Called to bridge from the quick to interpreter ABI. On entry the arguments match those 1977 * of a quick call: 1978 * RDI = method being called / to bridge to. 1979 * RSI, RDX, RCX, R8, R9 are arguments to that method. 1980 */ 1981 DEFINE_FUNCTION art_quick_to_interpreter_bridge 1982 SETUP_SAVE_REFS_AND_ARGS_FRAME // Set up frame and save arguments. 1983 movq %gs:THREAD_SELF_OFFSET, %rsi // RSI := Thread::Current() 1984 movq %rsp, %rdx // RDX := sp 1985 call SYMBOL(artQuickToInterpreterBridge) // (method, Thread*, SP) 1986 RESTORE_SAVE_REFS_AND_ARGS_FRAME // TODO: no need to restore arguments in this case. 1987 movq %rax, %xmm0 // Place return value also into floating point return value. 1988 RETURN_OR_DELIVER_PENDING_EXCEPTION // return or deliver exception 1989 END_FUNCTION art_quick_to_interpreter_bridge 1990 1991 /* 1992 * Called to catch an attempt to invoke an obsolete method. 1993 * RDI = method being called. 1994 */ 1995 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 1996 1997 /* 1998 * Routine that intercepts method calls and returns. 1999 */ 2000 DEFINE_FUNCTION art_quick_instrumentation_entry 2001 #if defined(__APPLE__) 2002 int3 2003 int3 2004 #else 2005 SETUP_SAVE_REFS_AND_ARGS_FRAME 2006 2007 movq %rdi, %r12 // Preserve method pointer in a callee-save. 2008 2009 movq %gs:THREAD_SELF_OFFSET, %rdx // Pass thread. 2010 movq %rsp, %rcx // Pass SP. 2011 2012 call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP) 2013 2014 // %rax = result of call. 2015 testq %rax, %rax 2016 jz 1f 2017 2018 movq %r12, %rdi // Reload method pointer. 2019 leaq art_quick_instrumentation_exit(%rip), %r12 // Set up return through instrumentation 2020 movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp) // exit. 2021 2022 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2023 2024 jmp *%rax // Tail call to intended method. 2025 1: 2026 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2027 DELIVER_PENDING_EXCEPTION 2028 #endif // __APPLE__ 2029 END_FUNCTION art_quick_instrumentation_entry 2030 2031 DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0 2032 pushq LITERAL(0) // Push a fake return PC as there will be none on the stack. 2033 CFI_ADJUST_CFA_OFFSET(8) 2034 2035 SETUP_SAVE_EVERYTHING_FRAME 2036 2037 leaq 16(%rsp), %rcx // Pass floating-point result pointer, in kSaveEverything frame. 2038 leaq 144(%rsp), %rdx // Pass integer result pointer, in kSaveEverything frame. 2039 movq %rsp, %rsi // Pass SP. 2040 movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. 2041 2042 call SYMBOL(artInstrumentationMethodExitFromCode) // (Thread*, SP, gpr_res*, fpr_res*) 2043 2044 testq %rax, %rax // Check if we have a return-pc to go to. If we don't then there was 2045 // an exception 2046 jz .Ldo_deliver_instrumentation_exception 2047 testq %rdx, %rdx 2048 jnz .Ldeoptimize 2049 // Normal return. 2050 movq %rax, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc. 2051 RESTORE_SAVE_EVERYTHING_FRAME 2052 ret 2053 .Ldeoptimize: 2054 movq %rdx, FRAME_SIZE_SAVE_EVERYTHING-8(%rsp) // Set return pc. 2055 RESTORE_SAVE_EVERYTHING_FRAME 2056 // Jump to art_quick_deoptimize. 2057 jmp SYMBOL(art_quick_deoptimize) 2058 .Ldo_deliver_instrumentation_exception: 2059 DELIVER_PENDING_EXCEPTION_FRAME_READY 2060 END_FUNCTION art_quick_instrumentation_exit 2061 2062 /* 2063 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 2064 * will long jump to the upcall with a special exception of -1. 2065 */ 2066 DEFINE_FUNCTION art_quick_deoptimize 2067 SETUP_SAVE_EVERYTHING_FRAME // Stack should be aligned now. 2068 movq %gs:THREAD_SELF_OFFSET, %rdi // Pass Thread. 2069 call SYMBOL(artDeoptimize) // (Thread*) 2070 UNREACHABLE 2071 END_FUNCTION art_quick_deoptimize 2072 2073 /* 2074 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 2075 * will long jump to the interpreter bridge. 2076 */ 2077 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code 2078 SETUP_SAVE_EVERYTHING_FRAME 2079 // Stack should be aligned now. 2080 movq %gs:THREAD_SELF_OFFSET, %rsi // Pass Thread. 2081 call SYMBOL(artDeoptimizeFromCompiledCode) // (DeoptimizationKind, Thread*) 2082 UNREACHABLE 2083 END_FUNCTION art_quick_deoptimize_from_compiled_code 2084 2085 /* 2086 * String's compareTo. 2087 * 2088 * On entry: 2089 * rdi: this string object (known non-null) 2090 * rsi: comp string object (known non-null) 2091 */ 2092 DEFINE_FUNCTION art_quick_string_compareto 2093 movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d 2094 movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d 2095 /* Build pointers to the start of string data */ 2096 leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi 2097 leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi 2098 #if (STRING_COMPRESSION_FEATURE) 2099 /* Differ cases */ 2100 shrl LITERAL(1), %r8d 2101 jnc .Lstring_compareto_this_is_compressed 2102 shrl LITERAL(1), %r9d 2103 jnc .Lstring_compareto_that_is_compressed 2104 jmp .Lstring_compareto_both_not_compressed 2105 .Lstring_compareto_this_is_compressed: 2106 shrl LITERAL(1), %r9d 2107 jnc .Lstring_compareto_both_compressed 2108 /* Comparison this (8-bit) and that (16-bit) */ 2109 mov %r8d, %eax 2110 subl %r9d, %eax 2111 mov %r8d, %ecx 2112 cmovg %r9d, %ecx 2113 /* Going into loop to compare each character */ 2114 jecxz .Lstring_compareto_keep_length1 // check loop counter (if 0 then stop) 2115 .Lstring_compareto_loop_comparison_this_compressed: 2116 movzbl (%edi), %r8d // move *(this_cur_char) byte to long 2117 movzwl (%esi), %r9d // move *(that_cur_char) word to long 2118 addl LITERAL(1), %edi // ++this_cur_char (8-bit) 2119 addl LITERAL(2), %esi // ++that_cur_char (16-bit) 2120 subl %r9d, %r8d 2121 loope .Lstring_compareto_loop_comparison_this_compressed 2122 cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char) 2123 .Lstring_compareto_keep_length1: 2124 ret 2125 .Lstring_compareto_that_is_compressed: 2126 movl %r8d, %eax 2127 subl %r9d, %eax 2128 mov %r8d, %ecx 2129 cmovg %r9d, %ecx 2130 /* Comparison this (8-bit) and that (16-bit) */ 2131 jecxz .Lstring_compareto_keep_length2 // check loop counter (if 0, don't compare) 2132 .Lstring_compareto_loop_comparison_that_compressed: 2133 movzwl (%edi), %r8d // move *(this_cur_char) word to long 2134 movzbl (%esi), %r9d // move *(that_cur_chat) byte to long 2135 addl LITERAL(2), %edi // ++this_cur_char (16-bit) 2136 addl LITERAL(1), %esi // ++that_cur_char (8-bit) 2137 subl %r9d, %r8d 2138 loope .Lstring_compareto_loop_comparison_that_compressed 2139 cmovne %r8d, %eax // return eax = *(this_cur_char) - *(that_cur_char) 2140 .Lstring_compareto_keep_length2: 2141 ret 2142 .Lstring_compareto_both_compressed: 2143 /* Calculate min length and count diff */ 2144 movl %r8d, %ecx 2145 movl %r8d, %eax 2146 subl %r9d, %eax 2147 cmovg %r9d, %ecx 2148 jecxz .Lstring_compareto_keep_length3 2149 repe cmpsb 2150 je .Lstring_compareto_keep_length3 2151 movzbl -1(%edi), %eax // get last compared char from this string (8-bit) 2152 movzbl -1(%esi), %ecx // get last compared char from comp string (8-bit) 2153 jmp .Lstring_compareto_count_difference 2154 #endif // STRING_COMPRESSION_FEATURE 2155 .Lstring_compareto_both_not_compressed: 2156 /* Calculate min length and count diff */ 2157 movl %r8d, %ecx 2158 movl %r8d, %eax 2159 subl %r9d, %eax 2160 cmovg %r9d, %ecx 2161 /* 2162 * At this point we have: 2163 * eax: value to return if first part of strings are equal 2164 * ecx: minimum among the lengths of the two strings 2165 * esi: pointer to comp string data 2166 * edi: pointer to this string data 2167 */ 2168 jecxz .Lstring_compareto_keep_length3 2169 repe cmpsw // find nonmatching chars in [%esi] and [%edi], up to length %ecx 2170 je .Lstring_compareto_keep_length3 2171 movzwl -2(%edi), %eax // get last compared char from this string (16-bit) 2172 movzwl -2(%esi), %ecx // get last compared char from comp string (16-bit) 2173 .Lstring_compareto_count_difference: 2174 subl %ecx, %eax // return the difference 2175 .Lstring_compareto_keep_length3: 2176 ret 2177 END_FUNCTION art_quick_string_compareto 2178 2179 UNIMPLEMENTED art_quick_memcmp16 2180 2181 DEFINE_FUNCTION art_quick_instance_of 2182 SETUP_FP_CALLEE_SAVE_FRAME 2183 subq LITERAL(8), %rsp // Alignment padding. 2184 CFI_ADJUST_CFA_OFFSET(8) 2185 call SYMBOL(artInstanceOfFromCode) // (mirror::Object*, mirror::Class*) 2186 addq LITERAL(8), %rsp 2187 CFI_ADJUST_CFA_OFFSET(-8) 2188 RESTORE_FP_CALLEE_SAVE_FRAME 2189 ret 2190 END_FUNCTION art_quick_instance_of 2191 2192 // Create a function `name` calling the ReadBarrier::Mark routine, 2193 // getting its argument and returning its result through register 2194 // `reg`, saving and restoring all caller-save registers. 2195 // 2196 // The generated function follows a non-standard runtime calling 2197 // convention: 2198 // - register `reg` (which may be different from RDI) is used to pass 2199 // the (sole) argument of this function; 2200 // - register `reg` (which may be different from RAX) is used to return 2201 // the result of this function (instead of RAX); 2202 // - if `reg` is different from `rdi`, RDI is treated like a normal 2203 // (non-argument) caller-save register; 2204 // - if `reg` is different from `rax`, RAX is treated like a normal 2205 // (non-result) caller-save register; 2206 // - everything else is the same as in the standard runtime calling 2207 // convention (e.g. standard callee-save registers are preserved). 2208 MACRO2(READ_BARRIER_MARK_REG, name, reg) 2209 DEFINE_FUNCTION VAR(name) 2210 // Null check so that we can load the lock word. 2211 testq REG_VAR(reg), REG_VAR(reg) 2212 jz .Lret_rb_\name 2213 .Lnot_null_\name: 2214 // Check the mark bit, if it is 1 return. 2215 testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)) 2216 jz .Lslow_rb_\name 2217 ret 2218 .Lslow_rb_\name: 2219 PUSH rax 2220 movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax 2221 addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax 2222 // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the 2223 // forwarding address one. 2224 // Taken ~25% of the time. 2225 jnae .Lret_forwarding_address\name 2226 2227 // Save all potentially live caller-save core registers. 2228 movq 0(%rsp), %rax 2229 PUSH rcx 2230 PUSH rdx 2231 PUSH rsi 2232 PUSH rdi 2233 PUSH r8 2234 PUSH r9 2235 PUSH r10 2236 PUSH r11 2237 // Create space for caller-save floating-point registers. 2238 subq MACRO_LITERAL(12 * 8), %rsp 2239 CFI_ADJUST_CFA_OFFSET(12 * 8) 2240 // Save all potentially live caller-save floating-point registers. 2241 movq %xmm0, 0(%rsp) 2242 movq %xmm1, 8(%rsp) 2243 movq %xmm2, 16(%rsp) 2244 movq %xmm3, 24(%rsp) 2245 movq %xmm4, 32(%rsp) 2246 movq %xmm5, 40(%rsp) 2247 movq %xmm6, 48(%rsp) 2248 movq %xmm7, 56(%rsp) 2249 movq %xmm8, 64(%rsp) 2250 movq %xmm9, 72(%rsp) 2251 movq %xmm10, 80(%rsp) 2252 movq %xmm11, 88(%rsp) 2253 SETUP_FP_CALLEE_SAVE_FRAME 2254 2255 .ifnc RAW_VAR(reg), rdi 2256 movq REG_VAR(reg), %rdi // Pass arg1 - obj from `reg`. 2257 .endif 2258 call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj) 2259 .ifnc RAW_VAR(reg), rax 2260 movq %rax, REG_VAR(reg) // Return result into `reg`. 2261 .endif 2262 2263 RESTORE_FP_CALLEE_SAVE_FRAME 2264 // Restore floating-point registers. 2265 movq 0(%rsp), %xmm0 2266 movq 8(%rsp), %xmm1 2267 movq 16(%rsp), %xmm2 2268 movq 24(%rsp), %xmm3 2269 movq 32(%rsp), %xmm4 2270 movq 40(%rsp), %xmm5 2271 movq 48(%rsp), %xmm6 2272 movq 56(%rsp), %xmm7 2273 movq 64(%rsp), %xmm8 2274 movq 72(%rsp), %xmm9 2275 movq 80(%rsp), %xmm10 2276 movq 88(%rsp), %xmm11 2277 // Remove floating-point registers. 2278 addq MACRO_LITERAL(12 * 8), %rsp 2279 CFI_ADJUST_CFA_OFFSET(-(12 * 8)) 2280 // Restore core regs, except `reg`, as it is used to return the 2281 // result of this function (simply remove it from the stack instead). 2282 POP_REG_NE r11, RAW_VAR(reg) 2283 POP_REG_NE r10, RAW_VAR(reg) 2284 POP_REG_NE r9, RAW_VAR(reg) 2285 POP_REG_NE r8, RAW_VAR(reg) 2286 POP_REG_NE rdi, RAW_VAR(reg) 2287 POP_REG_NE rsi, RAW_VAR(reg) 2288 POP_REG_NE rdx, RAW_VAR(reg) 2289 POP_REG_NE rcx, RAW_VAR(reg) 2290 POP_REG_NE rax, RAW_VAR(reg) 2291 .Lret_rb_\name: 2292 ret 2293 .Lret_forwarding_address\name: 2294 // The overflow cleared the top bits. 2295 sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax 2296 movq %rax, REG_VAR(reg) 2297 POP_REG_NE rax, RAW_VAR(reg) 2298 ret 2299 END_FUNCTION VAR(name) 2300 END_MACRO 2301 2302 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax 2303 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx 2304 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx 2305 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx 2306 // Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP) 2307 // cannot be used to pass arguments. 2308 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp 2309 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi 2310 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi 2311 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2312 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2313 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2314 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2315 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12 2316 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13 2317 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14 2318 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15 2319 2320 DEFINE_FUNCTION art_quick_read_barrier_slow 2321 SETUP_FP_CALLEE_SAVE_FRAME 2322 subq LITERAL(8), %rsp // Alignment padding. 2323 CFI_ADJUST_CFA_OFFSET(8) 2324 call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset) 2325 addq LITERAL(8), %rsp 2326 CFI_ADJUST_CFA_OFFSET(-8) 2327 RESTORE_FP_CALLEE_SAVE_FRAME 2328 ret 2329 END_FUNCTION art_quick_read_barrier_slow 2330 2331 DEFINE_FUNCTION art_quick_read_barrier_for_root_slow 2332 SETUP_FP_CALLEE_SAVE_FRAME 2333 subq LITERAL(8), %rsp // Alignment padding. 2334 CFI_ADJUST_CFA_OFFSET(8) 2335 call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root) 2336 addq LITERAL(8), %rsp 2337 CFI_ADJUST_CFA_OFFSET(-8) 2338 RESTORE_FP_CALLEE_SAVE_FRAME 2339 ret 2340 END_FUNCTION art_quick_read_barrier_for_root_slow 2341 2342 /* 2343 * On stack replacement stub. 2344 * On entry: 2345 * [sp] = return address 2346 * rdi = stack to copy 2347 * rsi = size of stack 2348 * rdx = pc to call 2349 * rcx = JValue* result 2350 * r8 = shorty 2351 * r9 = thread 2352 * 2353 * Note that the native C ABI already aligned the stack to 16-byte. 2354 */ 2355 DEFINE_FUNCTION art_quick_osr_stub 2356 // Save the non-volatiles. 2357 PUSH rbp // Save rbp. 2358 PUSH rcx // Save rcx/result*. 2359 PUSH r8 // Save r8/shorty*. 2360 2361 // Save callee saves. 2362 PUSH rbx 2363 PUSH r12 2364 PUSH r13 2365 PUSH r14 2366 PUSH r15 2367 2368 pushq LITERAL(0) // Push null for ArtMethod*. 2369 CFI_ADJUST_CFA_OFFSET(8) 2370 movl %esi, %ecx // rcx := size of stack 2371 movq %rdi, %rsi // rsi := stack to copy 2372 movq %rsp, %rbp // Save stack pointer to RBP for CFI use in .Losr_entry. 2373 call .Losr_entry 2374 CFI_REMEMBER_STATE 2375 2376 // Restore stack and callee-saves. 2377 addq LITERAL(8), %rsp 2378 CFI_ADJUST_CFA_OFFSET(-8) 2379 POP r15 2380 POP r14 2381 POP r13 2382 POP r12 2383 POP rbx 2384 POP r8 2385 POP rcx 2386 POP rbp 2387 cmpb LITERAL(68), (%r8) // Test if result type char == 'D'. 2388 je .Losr_return_double_quick 2389 cmpb LITERAL(70), (%r8) // Test if result type char == 'F'. 2390 je .Losr_return_float_quick 2391 movq %rax, (%rcx) // Store the result assuming its a long, int or Object* 2392 ret 2393 .Losr_return_double_quick: 2394 movsd %xmm0, (%rcx) // Store the double floating point result. 2395 ret 2396 .Losr_return_float_quick: 2397 movss %xmm0, (%rcx) // Store the floating point result. 2398 ret 2399 .Losr_entry: 2400 CFI_RESTORE_STATE // Restore CFI state; however, since the call has pushed the 2401 CFI_DEF_CFA_REGISTER(rbp) // return address we need to switch the CFA register to RBP. 2402 2403 subl LITERAL(8), %ecx // Given stack size contains pushed frame pointer, substract it. 2404 subq %rcx, %rsp 2405 movq %rsp, %rdi // rdi := beginning of stack 2406 rep movsb // while (rcx--) { *rdi++ = *rsi++ } 2407 jmp *%rdx 2408 END_FUNCTION art_quick_osr_stub 2409 2410 DEFINE_FUNCTION art_quick_invoke_polymorphic 2411 SETUP_SAVE_REFS_AND_ARGS_FRAME // save callee saves 2412 movq %gs:THREAD_SELF_OFFSET, %rdx // pass Thread 2413 movq %rsp, %rcx // pass SP 2414 subq LITERAL(16), %rsp // make space for JValue result 2415 CFI_ADJUST_CFA_OFFSET(16) 2416 movq LITERAL(0), (%rsp) // initialize result 2417 movq %rsp, %rdi // store pointer to JValue result 2418 call SYMBOL(artInvokePolymorphic) // artInvokePolymorphic(result, receiver, Thread*, SP) 2419 // save the code pointer 2420 subq LITERAL('A'), %rax // Convert type descriptor character value to a zero based index. 2421 cmpb LITERAL('Z' - 'A'), %al // Eliminate out of bounds options 2422 ja .Lcleanup_and_return 2423 movzbq %al, %rax 2424 leaq .Lhandler_table(%rip), %rcx // Get the address of the handler table 2425 movslq (%rcx, %rax, 4), %rax // Lookup handler offset relative to table 2426 addq %rcx, %rax // Add table address to yield handler address. 2427 jmpq *%rax // Jump to handler. 2428 2429 .align 4 2430 .Lhandler_table: // Table of type descriptor to handlers. 2431 MACRO1(HANDLER_TABLE_OFFSET, handle_label) 2432 // NB some tools require 32-bits for relocations. Shouldn't need adjusting. 2433 .long RAW_VAR(handle_label) - .Lhandler_table 2434 END_MACRO 2435 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // A 2436 HANDLER_TABLE_OFFSET(.Lstore_long_result) // B (byte) 2437 HANDLER_TABLE_OFFSET(.Lstore_char_result) // C (char) 2438 HANDLER_TABLE_OFFSET(.Lstore_double_result) // D (double) 2439 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // E 2440 HANDLER_TABLE_OFFSET(.Lstore_float_result) // F (float) 2441 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // G 2442 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // H 2443 HANDLER_TABLE_OFFSET(.Lstore_long_result) // I (int) 2444 HANDLER_TABLE_OFFSET(.Lstore_long_result) // J (long) 2445 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // K 2446 HANDLER_TABLE_OFFSET(.Lstore_long_result) // L (object - references are compressed and only 32-bits) 2447 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // M 2448 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // N 2449 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // O 2450 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // P 2451 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // Q 2452 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // R 2453 HANDLER_TABLE_OFFSET(.Lstore_long_result) // S (short) 2454 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // T 2455 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // U 2456 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // V (void) 2457 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // W 2458 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // X 2459 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // Y 2460 HANDLER_TABLE_OFFSET(.Lstore_boolean_result) // Z (boolean) 2461 2462 .Lstore_boolean_result: 2463 movzbq (%rsp), %rax // Copy boolean result to the accumulator 2464 jmp .Lcleanup_and_return 2465 .Lstore_char_result: 2466 movzwq (%rsp), %rax // Copy char result to the accumulator 2467 jmp .Lcleanup_and_return 2468 .Lstore_float_result: 2469 movd (%rsp), %xmm0 // Copy float result to the context restored by 2470 movd %xmm0, 32(%rsp) // RESTORE_SAVE_REFS_AND_ARGS_FRAME. 2471 jmp .Lcleanup_and_return 2472 .Lstore_double_result: 2473 movsd (%rsp), %xmm0 // Copy double result to the context restored by 2474 movsd %xmm0, 32(%rsp) // RESTORE_SAVE_REFS_AND_ARGS_FRAME. 2475 jmp .Lcleanup_and_return 2476 .Lstore_long_result: 2477 movq (%rsp), %rax // Copy long result to the accumulator. 2478 // Fall-through 2479 .Lcleanup_and_return: 2480 addq LITERAL(16), %rsp // Pop space for JValue result. 2481 CFI_ADJUST_CFA_OFFSET(16) 2482 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2483 RETURN_OR_DELIVER_PENDING_EXCEPTION 2484 END_FUNCTION art_quick_invoke_polymorphic 2485 2486 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2487 // Argument 0: RDI: The context pointer for ExecuteSwitchImpl. 2488 // Argument 1: RSI: Pointer to the templated ExecuteSwitchImpl to call. 2489 // Argument 2: RDX: The value of DEX PC (memory address of the methods bytecode). 2490 DEFINE_FUNCTION ExecuteSwitchImplAsm 2491 PUSH rbx // Spill RBX 2492 movq %rdx, %rbx // RBX = DEX PC (callee save register) 2493 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* RAX */, 3 /* RBX */, 0) 2494 2495 call *%rsi // Call the wrapped function 2496 2497 POP rbx // Restore RBX 2498 ret 2499 END_FUNCTION ExecuteSwitchImplAsm 2500