Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_x86_64.S"
     18 
     19 #include "arch/quick_alloc_entrypoints.S"
     20 
     21 MACRO0(ASSERT_USE_READ_BARRIER)
     22 #if !defined(USE_READ_BARRIER)
     23     int3
     24     int3
     25 #endif
     26 END_MACRO
     27 
     28 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     29     // Create space for ART FP callee-saved registers
     30     subq MACRO_LITERAL(4 * 8), %rsp
     31     CFI_ADJUST_CFA_OFFSET(4 * 8)
     32     movq %xmm12, 0(%rsp)
     33     movq %xmm13, 8(%rsp)
     34     movq %xmm14, 16(%rsp)
     35     movq %xmm15, 24(%rsp)
     36 END_MACRO
     37 
     38 MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
     39     // Restore ART FP callee-saved registers
     40     movq 0(%rsp), %xmm12
     41     movq 8(%rsp), %xmm13
     42     movq 16(%rsp), %xmm14
     43     movq 24(%rsp), %xmm15
     44     addq MACRO_LITERAL(4 * 8), %rsp
     45     CFI_ADJUST_CFA_OFFSET(- 4 * 8)
     46 END_MACRO
     47 
     48 // For x86, the CFA is esp+4, the address above the pushed return address on the stack.
     49 
     50     /*
     51      * Macro that sets up the callee save frame to conform with
     52      * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
     53      */
     54 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVES_FRAME)
     55 #if defined(__APPLE__)
     56     int3
     57     int3
     58 #else
     59     // R10 := Runtime::Current()
     60     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     61     movq (%r10), %r10
     62     // Save callee save registers to agree with core spills bitmap.
     63     PUSH r15  // Callee save.
     64     PUSH r14  // Callee save.
     65     PUSH r13  // Callee save.
     66     PUSH r12  // Callee save.
     67     PUSH rbp  // Callee save.
     68     PUSH rbx  // Callee save.
     69     // Create space for FPR args, plus space for ArtMethod*.
     70     subq MACRO_LITERAL(4 * 8 + 8), %rsp
     71     CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
     72     // Save FPRs.
     73     movq %xmm12, 8(%rsp)
     74     movq %xmm13, 16(%rsp)
     75     movq %xmm14, 24(%rsp)
     76     movq %xmm15, 32(%rsp)
     77     // R10 := ArtMethod* for save all callee save frame method.
     78     movq RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET(%r10), %r10
     79     // Store ArtMethod* to bottom of stack.
     80     movq %r10, 0(%rsp)
     81     // Store rsp as the top quick frame.
     82     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
     83 
     84     // Ugly compile-time check, but we only have the preprocessor.
     85     // Last +8: implicit return address pushed on stack when caller made call.
     86 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 6 * 8 + 4 * 8 + 8 + 8)
     87 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(X86_64) size not as expected."
     88 #endif
     89 #endif  // __APPLE__
     90 END_MACRO
     91 
     92     /*
     93      * Macro that sets up the callee save frame to conform with
     94      * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly)
     95      */
     96 MACRO0(SETUP_SAVE_REFS_ONLY_FRAME)
     97 #if defined(__APPLE__)
     98     int3
     99     int3
    100 #else
    101     // R10 := Runtime::Current()
    102     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
    103     movq (%r10), %r10
    104     // Save callee and GPR args, mixed together to agree with core spills bitmap.
    105     PUSH r15  // Callee save.
    106     PUSH r14  // Callee save.
    107     PUSH r13  // Callee save.
    108     PUSH r12  // Callee save.
    109     PUSH rbp  // Callee save.
    110     PUSH rbx  // Callee save.
    111     // Create space for FPR args, plus space for ArtMethod*.
    112     subq LITERAL(8 + 4 * 8), %rsp
    113     CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
    114     // Save FPRs.
    115     movq %xmm12, 8(%rsp)
    116     movq %xmm13, 16(%rsp)
    117     movq %xmm14, 24(%rsp)
    118     movq %xmm15, 32(%rsp)
    119     // R10 := ArtMethod* for refs only callee save frame method.
    120     movq RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET(%r10), %r10
    121     // Store ArtMethod* to bottom of stack.
    122     movq %r10, 0(%rsp)
    123     // Store rsp as the stop quick frame.
    124     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    125 
    126     // Ugly compile-time check, but we only have the preprocessor.
    127     // Last +8: implicit return address pushed on stack when caller made call.
    128 #if (FRAME_SIZE_SAVE_REFS_ONLY != 6 * 8 + 4 * 8 + 8 + 8)
    129 #error "FRAME_SIZE_SAVE_REFS_ONLY(X86_64) size not as expected."
    130 #endif
    131 #endif  // __APPLE__
    132 END_MACRO
    133 
    134 MACRO0(RESTORE_SAVE_REFS_ONLY_FRAME)
    135     movq 8(%rsp), %xmm12
    136     movq 16(%rsp), %xmm13
    137     movq 24(%rsp), %xmm14
    138     movq 32(%rsp), %xmm15
    139     addq LITERAL(8 + 4*8), %rsp
    140     CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
    141     // TODO: optimize by not restoring callee-saves restored by the ABI
    142     POP rbx
    143     POP rbp
    144     POP r12
    145     POP r13
    146     POP r14
    147     POP r15
    148 END_MACRO
    149 
    150     /*
    151      * Macro that sets up the callee save frame to conform with
    152      * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs)
    153      */
    154 MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME)
    155 #if defined(__APPLE__)
    156     int3
    157     int3
    158 #else
    159     // R10 := Runtime::Current()
    160     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
    161     movq (%r10), %r10
    162     // Save callee and GPR args, mixed together to agree with core spills bitmap.
    163     PUSH r15  // Callee save.
    164     PUSH r14  // Callee save.
    165     PUSH r13  // Callee save.
    166     PUSH r12  // Callee save.
    167     PUSH r9   // Quick arg 5.
    168     PUSH r8   // Quick arg 4.
    169     PUSH rsi  // Quick arg 1.
    170     PUSH rbp  // Callee save.
    171     PUSH rbx  // Callee save.
    172     PUSH rdx  // Quick arg 2.
    173     PUSH rcx  // Quick arg 3.
    174     // Create space for FPR args and create 2 slots for ArtMethod*.
    175     subq MACRO_LITERAL(16 + 12 * 8), %rsp
    176     CFI_ADJUST_CFA_OFFSET(16 + 12 * 8)
    177     // R10 := ArtMethod* for ref and args callee save frame method.
    178     movq RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET(%r10), %r10
    179     // Save FPRs.
    180     movq %xmm0, 16(%rsp)
    181     movq %xmm1, 24(%rsp)
    182     movq %xmm2, 32(%rsp)
    183     movq %xmm3, 40(%rsp)
    184     movq %xmm4, 48(%rsp)
    185     movq %xmm5, 56(%rsp)
    186     movq %xmm6, 64(%rsp)
    187     movq %xmm7, 72(%rsp)
    188     movq %xmm12, 80(%rsp)
    189     movq %xmm13, 88(%rsp)
    190     movq %xmm14, 96(%rsp)
    191     movq %xmm15, 104(%rsp)
    192     // Store ArtMethod* to bottom of stack.
    193     movq %r10, 0(%rsp)
    194     // Store rsp as the top quick frame.
    195     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    196 
    197     // Ugly compile-time check, but we only have the preprocessor.
    198     // Last +8: implicit return address pushed on stack when caller made call.
    199 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 11 * 8 + 12 * 8 + 16 + 8)
    200 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(X86_64) size not as expected."
    201 #endif
    202 #endif  // __APPLE__
    203 END_MACRO
    204 
    205 MACRO0(SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI)
    206     // Save callee and GPR args, mixed together to agree with core spills bitmap.
    207     PUSH r15  // Callee save.
    208     PUSH r14  // Callee save.
    209     PUSH r13  // Callee save.
    210     PUSH r12  // Callee save.
    211     PUSH r9   // Quick arg 5.
    212     PUSH r8   // Quick arg 4.
    213     PUSH rsi  // Quick arg 1.
    214     PUSH rbp  // Callee save.
    215     PUSH rbx  // Callee save.
    216     PUSH rdx  // Quick arg 2.
    217     PUSH rcx  // Quick arg 3.
    218     // Create space for FPR args and create 2 slots for ArtMethod*.
    219     subq LITERAL(80 + 4 * 8), %rsp
    220     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
    221     // Save FPRs.
    222     movq %xmm0, 16(%rsp)
    223     movq %xmm1, 24(%rsp)
    224     movq %xmm2, 32(%rsp)
    225     movq %xmm3, 40(%rsp)
    226     movq %xmm4, 48(%rsp)
    227     movq %xmm5, 56(%rsp)
    228     movq %xmm6, 64(%rsp)
    229     movq %xmm7, 72(%rsp)
    230     movq %xmm12, 80(%rsp)
    231     movq %xmm13, 88(%rsp)
    232     movq %xmm14, 96(%rsp)
    233     movq %xmm15, 104(%rsp)
    234     // Store ArtMethod to bottom of stack.
    235     movq %rdi, 0(%rsp)
    236     // Store rsp as the stop quick frame.
    237     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    238 END_MACRO
    239 
    240 MACRO0(RESTORE_SAVE_REFS_AND_ARGS_FRAME)
    241     // Restore FPRs.
    242     movq 16(%rsp), %xmm0
    243     movq 24(%rsp), %xmm1
    244     movq 32(%rsp), %xmm2
    245     movq 40(%rsp), %xmm3
    246     movq 48(%rsp), %xmm4
    247     movq 56(%rsp), %xmm5
    248     movq 64(%rsp), %xmm6
    249     movq 72(%rsp), %xmm7
    250     movq 80(%rsp), %xmm12
    251     movq 88(%rsp), %xmm13
    252     movq 96(%rsp), %xmm14
    253     movq 104(%rsp), %xmm15
    254     addq MACRO_LITERAL(80 + 4 * 8), %rsp
    255     CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
    256     // Restore callee and GPR args, mixed together to agree with core spills bitmap.
    257     POP rcx
    258     POP rdx
    259     POP rbx
    260     POP rbp
    261     POP rsi
    262     POP r8
    263     POP r9
    264     POP r12
    265     POP r13
    266     POP r14
    267     POP r15
    268 END_MACRO
    269 
    270     /*
    271      * Macro that sets up the callee save frame to conform with
    272      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    273      * when R14 and R15 are already saved.
    274      */
    275 MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED)
    276 #if defined(__APPLE__)
    277     int3
    278     int3
    279 #else
    280     // Save core registers from highest to lowest to agree with core spills bitmap.
    281     // R14 and R15, or at least placeholders for them, are already on the stack.
    282     PUSH r13
    283     PUSH r12
    284     PUSH r11
    285     PUSH r10
    286     PUSH r9
    287     PUSH r8
    288     PUSH rdi
    289     PUSH rsi
    290     PUSH rbp
    291     PUSH rbx
    292     PUSH rdx
    293     PUSH rcx
    294     PUSH rax
    295     // Create space for FPRs and stack alignment padding.
    296     subq MACRO_LITERAL(8 + 16 * 8), %rsp
    297     CFI_ADJUST_CFA_OFFSET(8 + 16 * 8)
    298     // R10 := Runtime::Current()
    299     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
    300     movq (%r10), %r10
    301     // Save FPRs.
    302     movq %xmm0, 8(%rsp)
    303     movq %xmm1, 16(%rsp)
    304     movq %xmm2, 24(%rsp)
    305     movq %xmm3, 32(%rsp)
    306     movq %xmm4, 40(%rsp)
    307     movq %xmm5, 48(%rsp)
    308     movq %xmm6, 56(%rsp)
    309     movq %xmm7, 64(%rsp)
    310     movq %xmm8, 72(%rsp)
    311     movq %xmm9, 80(%rsp)
    312     movq %xmm10, 88(%rsp)
    313     movq %xmm11, 96(%rsp)
    314     movq %xmm12, 104(%rsp)
    315     movq %xmm13, 112(%rsp)
    316     movq %xmm14, 120(%rsp)
    317     movq %xmm15, 128(%rsp)
    318     // Push ArtMethod* for save everything frame method.
    319     pushq RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET(%r10)
    320     CFI_ADJUST_CFA_OFFSET(8)
    321     // Store rsp as the top quick frame.
    322     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    323 
    324     // Ugly compile-time check, but we only have the preprocessor.
    325     // Last +8: implicit return address pushed on stack when caller made call.
    326 #if (FRAME_SIZE_SAVE_EVERYTHING != 15 * 8 + 16 * 8 + 16 + 8)
    327 #error "FRAME_SIZE_SAVE_EVERYTHING(X86_64) size not as expected."
    328 #endif
    329 #endif  // __APPLE__
    330 END_MACRO
    331 
    332     /*
    333      * Macro that sets up the callee save frame to conform with
    334      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    335      * when R15 is already saved.
    336      */
    337 MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
    338     PUSH r14
    339     SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
    340 END_MACRO
    341 
    342     /*
    343      * Macro that sets up the callee save frame to conform with
    344      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    345      */
    346 MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
    347     PUSH r15
    348     SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
    349 END_MACRO
    350 
    351 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
    352     // Restore FPRs. Method and padding is still on the stack.
    353     movq 16(%rsp), %xmm0
    354     movq 24(%rsp), %xmm1
    355     movq 32(%rsp), %xmm2
    356     movq 40(%rsp), %xmm3
    357     movq 48(%rsp), %xmm4
    358     movq 56(%rsp), %xmm5
    359     movq 64(%rsp), %xmm6
    360     movq 72(%rsp), %xmm7
    361     movq 80(%rsp), %xmm8
    362     movq 88(%rsp), %xmm9
    363     movq 96(%rsp), %xmm10
    364     movq 104(%rsp), %xmm11
    365     movq 112(%rsp), %xmm12
    366     movq 120(%rsp), %xmm13
    367     movq 128(%rsp), %xmm14
    368     movq 136(%rsp), %xmm15
    369 END_MACRO
    370 
    371 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
    372     // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
    373     POP rcx
    374     POP rdx
    375     POP rbx
    376     POP rbp
    377     POP rsi
    378     POP rdi
    379     POP r8
    380     POP r9
    381     POP r10
    382     POP r11
    383     POP r12
    384     POP r13
    385     POP r14
    386     POP r15
    387 END_MACRO
    388 
    389 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
    390     RESTORE_SAVE_EVERYTHING_FRAME_FRPS
    391 
    392     // Remove save everything callee save method, stack alignment padding and FPRs.
    393     addq MACRO_LITERAL(16 + 16 * 8), %rsp
    394     CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
    395 
    396     POP rax
    397     RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
    398 END_MACRO
    399 
    400 MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
    401     RESTORE_SAVE_EVERYTHING_FRAME_FRPS
    402 
    403     // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
    404     addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
    405     CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
    406 
    407     RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
    408 END_MACRO
    409 
    410     /*
    411      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    412      * exception is Thread::Current()->exception_ when the runtime method frame is ready.
    413      */
    414 MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
    415     // (Thread*) setup
    416     movq %gs:THREAD_SELF_OFFSET, %rdi
    417     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
    418     UNREACHABLE
    419 END_MACRO
    420 
    421     /*
    422      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    423      * exception is Thread::Current()->exception_.
    424      */
    425 MACRO0(DELIVER_PENDING_EXCEPTION)
    426     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
    427     DELIVER_PENDING_EXCEPTION_FRAME_READY
    428 END_MACRO
    429 
    430 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
    431     DEFINE_FUNCTION VAR(c_name)
    432     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
    433     // Outgoing argument set up
    434     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
    435     call CALLVAR(cxx_name)             // cxx_name(Thread*)
    436     UNREACHABLE
    437     END_FUNCTION VAR(c_name)
    438 END_MACRO
    439 
    440 MACRO2(NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
    441     DEFINE_FUNCTION VAR(c_name)
    442     SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
    443     // Outgoing argument set up
    444     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
    445     call CALLVAR(cxx_name)             // cxx_name(Thread*)
    446     UNREACHABLE
    447     END_FUNCTION VAR(c_name)
    448 END_MACRO
    449 
    450 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
    451     DEFINE_FUNCTION VAR(c_name)
    452     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
    453     // Outgoing argument set up
    454     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
    455     call CALLVAR(cxx_name)             // cxx_name(arg1, Thread*)
    456     UNREACHABLE
    457     END_FUNCTION VAR(c_name)
    458 END_MACRO
    459 
    460 MACRO2(TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING, c_name, cxx_name)
    461     DEFINE_FUNCTION VAR(c_name)
    462     SETUP_SAVE_EVERYTHING_FRAME        // save all registers as basis for long jump context
    463     // Outgoing argument set up
    464     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
    465     call CALLVAR(cxx_name)             // cxx_name(Thread*)
    466     UNREACHABLE
    467     END_FUNCTION VAR(c_name)
    468 END_MACRO
    469 
    470     /*
    471      * Called by managed code to create and deliver a NullPointerException.
    472      */
    473 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    474 
    475     /*
    476      * Call installed by a signal handler to create and deliver a NullPointerException.
    477      */
    478 DEFINE_FUNCTION_CUSTOM_CFA art_quick_throw_null_pointer_exception_from_signal, 2 * __SIZEOF_POINTER__
    479     // Fault address and return address were saved by the fault handler.
    480     // Save all registers as basis for long jump context; R15 will replace fault address later.
    481     SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
    482     // Retrieve fault address and save R15.
    483     movq (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp), %rdi
    484     movq %r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)(%rsp)
    485     CFI_REL_OFFSET(%r15, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__))
    486     // Outgoing argument set up; RDI already contains the fault address.
    487     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
    488     call SYMBOL(artThrowNullPointerExceptionFromSignal)  // (addr, self)
    489     UNREACHABLE
    490 END_FUNCTION art_quick_throw_null_pointer_exception_from_signal
    491 
    492     /*
    493      * Called by managed code to create and deliver an ArithmeticException.
    494      */
    495 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
    496 
    497     /*
    498      * Called by managed code to create and deliver a StackOverflowError.
    499      */
    500 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    501 
    502     /*
    503      * Called by managed code, saves callee saves and then calls artThrowException
    504      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    505      */
    506 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    507 
    508     /*
    509      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    510      * index, arg2 holds limit.
    511      */
    512 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    513 
    514     /*
    515      * Called by managed code to create and deliver a StringIndexOutOfBoundsException
    516      * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
    517      */
    518 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
    519 
    520     /*
    521      * All generated callsites for interface invokes and invocation slow paths will load arguments
    522      * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
    523      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
    524      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
    525      *
    526      * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
    527      * of the target Method* in rax and method->code_ in rdx.
    528      *
    529      * If unsuccessful, the helper will return null/????. There will be a pending exception in the
    530      * thread and we branch to another stub to deliver it.
    531      *
    532      * On success this wrapper will restore arguments and *jump* to the target, leaving the return
    533      * location on the stack.
    534      *
    535      * Adapted from x86 code.
    536      */
    537 MACRO1(INVOKE_TRAMPOLINE_BODY, cxx_name)
    538     SETUP_SAVE_REFS_AND_ARGS_FRAME  // save callee saves in case allocation triggers GC
    539     // Helper signature is always
    540     // (method_idx, *this_object, *caller_method, *self, sp)
    541 
    542     movq %gs:THREAD_SELF_OFFSET, %rdx                      // pass Thread
    543     movq %rsp, %rcx                                        // pass SP
    544 
    545     call CALLVAR(cxx_name)                                 // cxx_name(arg1, arg2, Thread*, SP)
    546                                                            // save the code pointer
    547     movq %rax, %rdi
    548     movq %rdx, %rax
    549     RESTORE_SAVE_REFS_AND_ARGS_FRAME
    550 
    551     testq %rdi, %rdi
    552     jz 1f
    553 
    554     // Tail call to intended method.
    555     jmp *%rax
    556 1:
    557     DELIVER_PENDING_EXCEPTION
    558 END_MACRO
    559 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
    560     DEFINE_FUNCTION VAR(c_name)
    561     INVOKE_TRAMPOLINE_BODY RAW_VAR(cxx_name)
    562     END_FUNCTION VAR(c_name)
    563 END_MACRO
    564 
    565 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    566 
    567 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    568 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    569 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    570 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    571 
    572 
    573     /*
    574      * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
    575      * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
    576      * the end of the shorty.
    577      */
    578 MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
    579 1: // LOOP
    580     movb (%r10), %al              // al := *shorty
    581     addq MACRO_LITERAL(1), %r10   // shorty++
    582     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
    583     je VAR(finished)
    584     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
    585     je 2f
    586     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
    587     je 3f
    588     addq MACRO_LITERAL(4), %r11   // arg_array++
    589     //  Handle extra space in arg array taken by a long.
    590     cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
    591     jne 1b
    592     addq MACRO_LITERAL(4), %r11   // arg_array++
    593     jmp 1b                        // goto LOOP
    594 2:  // FOUND_DOUBLE
    595     movsd (%r11), REG_VAR(xmm_reg)
    596     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    597     jmp 4f
    598 3:  // FOUND_FLOAT
    599     movss (%r11), REG_VAR(xmm_reg)
    600     addq MACRO_LITERAL(4), %r11   // arg_array++
    601 4:
    602 END_MACRO
    603 
    604     /*
    605      * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
    606      * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
    607      * the end of the shorty.
    608      */
    609 MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
    610 1: // LOOP
    611     movb (%r10), %al              // al := *shorty
    612     addq MACRO_LITERAL(1), %r10   // shorty++
    613     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
    614     je  VAR(finished)
    615     cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
    616     je 2f
    617     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
    618     je 3f
    619     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
    620     je 4f
    621     movl (%r11), REG_VAR(gpr_reg32)
    622     addq MACRO_LITERAL(4), %r11   // arg_array++
    623     jmp 5f
    624 2:  // FOUND_LONG
    625     movq (%r11), REG_VAR(gpr_reg64)
    626     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    627     jmp 5f
    628 3:  // SKIP_FLOAT
    629     addq MACRO_LITERAL(4), %r11   // arg_array++
    630     jmp 1b
    631 4:  // SKIP_DOUBLE
    632     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    633     jmp 1b
    634 5:
    635 END_MACRO
    636 
    637     /*
    638      * Quick invocation stub.
    639      * On entry:
    640      *   [sp] = return address
    641      *   rdi = method pointer
    642      *   rsi = argument array that must at least contain the this pointer.
    643      *   rdx = size of argument array in bytes
    644      *   rcx = (managed) thread pointer
    645      *   r8 = JValue* result
    646      *   r9 = char* shorty
    647      */
    648 DEFINE_FUNCTION art_quick_invoke_stub
    649 #if defined(__APPLE__)
    650     int3
    651     int3
    652 #else
    653     // Set up argument XMM registers.
    654     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
    655     leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
    656     LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
    657     LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
    658     LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
    659     LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
    660     LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
    661     LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
    662     LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
    663     LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
    664     .balign 16
    665 .Lxmm_setup_finished:
    666     PUSH rbp                      // Save rbp.
    667     PUSH r8                       // Save r8/result*.
    668     PUSH r9                       // Save r9/shorty*.
    669     PUSH rbx                      // Save native callee save rbx
    670     PUSH r12                      // Save native callee save r12
    671     PUSH r13                      // Save native callee save r13
    672     PUSH r14                      // Save native callee save r14
    673     PUSH r15                      // Save native callee save r15
    674     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
    675     CFI_DEF_CFA_REGISTER(rbp)
    676 
    677     movl %edx, %r10d
    678     addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
    679                                    // r8, r9, rbx, r12, r13, r14, and r15 in frame.
    680     andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
    681     subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
    682                                    // r13, r14, and r15
    683     subq %rdx, %rsp                // Reserve stack space for argument array.
    684 
    685 #if (STACK_REFERENCE_SIZE != 4)
    686 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
    687 #endif
    688     movq LITERAL(0), (%rsp)       // Store null for method*
    689 
    690     movl %r10d, %ecx              // Place size of args in rcx.
    691     movq %rdi, %rax               // rax := method to be called
    692     movq %rsi, %r11               // r11 := arg_array
    693     leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
    694                                   // arguments.
    695     // Copy arg array into stack.
    696     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
    697     leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
    698     movq %rax, %rdi               // rdi := method to be called
    699     movl (%r11), %esi             // rsi := this pointer
    700     addq LITERAL(4), %r11         // arg_array++
    701     LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
    702     LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
    703     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
    704     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
    705 .Lgpr_setup_finished:
    706     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    707     movq %rbp, %rsp               // Restore stack pointer.
    708     POP r15                       // Pop r15
    709     POP r14                       // Pop r14
    710     POP r13                       // Pop r13
    711     POP r12                       // Pop r12
    712     POP rbx                       // Pop rbx
    713     POP r9                        // Pop r9 - shorty*
    714     POP r8                        // Pop r8 - result*.
    715     POP rbp                       // Pop rbp
    716     cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
    717     je .Lreturn_double_quick
    718     cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
    719     je .Lreturn_float_quick
    720     movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
    721     ret
    722 .Lreturn_double_quick:
    723     movsd %xmm0, (%r8)            // Store the double floating point result.
    724     ret
    725 .Lreturn_float_quick:
    726     movss %xmm0, (%r8)            // Store the floating point result.
    727     ret
    728 #endif  // __APPLE__
    729 END_FUNCTION art_quick_invoke_stub
    730 
    731     /*
    732      * Quick invocation stub.
    733      * On entry:
    734      *   [sp] = return address
    735      *   rdi = method pointer
    736      *   rsi = argument array or null if no arguments.
    737      *   rdx = size of argument array in bytes
    738      *   rcx = (managed) thread pointer
    739      *   r8 = JValue* result
    740      *   r9 = char* shorty
    741      */
    742 DEFINE_FUNCTION art_quick_invoke_static_stub
    743 #if defined(__APPLE__)
    744     int3
    745     int3
    746 #else
    747     // Set up argument XMM registers.
    748     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
    749     movq %rsi, %r11               // R11 := arg_array
    750     LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
    751     LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
    752     LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
    753     LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
    754     LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
    755     LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
    756     LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
    757     LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
    758     .balign 16
    759 .Lxmm_setup_finished2:
    760     PUSH rbp                      // Save rbp.
    761     PUSH r8                       // Save r8/result*.
    762     PUSH r9                       // Save r9/shorty*.
    763     PUSH rbx                      // Save rbx
    764     PUSH r12                      // Save r12
    765     PUSH r13                      // Save r13
    766     PUSH r14                      // Save r14
    767     PUSH r15                      // Save r15
    768     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
    769     CFI_DEF_CFA_REGISTER(rbp)
    770 
    771     movl %edx, %r10d
    772     addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
    773                                    // r8, r9, r12, r13, r14, and r15 in frame.
    774     andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
    775     subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
    776                                    // r13, r14, and r15.
    777     subq %rdx, %rsp                // Reserve stack space for argument array.
    778 
    779 #if (STACK_REFERENCE_SIZE != 4)
    780 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
    781 #endif
    782     movq LITERAL(0), (%rsp)        // Store null for method*
    783 
    784     movl %r10d, %ecx               // Place size of args in rcx.
    785     movq %rdi, %rax                // rax := method to be called
    786     movq %rsi, %r11                // r11 := arg_array
    787     leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
    788                                    // stack arguments.
    789     // Copy arg array into stack.
    790     rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
    791     leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
    792     movq %rax, %rdi                // rdi := method to be called
    793     LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
    794     LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
    795     LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
    796     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
    797     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
    798 .Lgpr_setup_finished2:
    799     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    800     movq %rbp, %rsp                // Restore stack pointer.
    801     POP r15                        // Pop r15
    802     POP r14                        // Pop r14
    803     POP r13                        // Pop r13
    804     POP r12                        // Pop r12
    805     POP rbx                        // Pop rbx
    806     POP r9                         // Pop r9 - shorty*.
    807     POP r8                         // Pop r8 - result*.
    808     POP rbp                        // Pop rbp
    809     cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
    810     je .Lreturn_double_quick2
    811     cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
    812     je .Lreturn_float_quick2
    813     movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
    814     ret
    815 .Lreturn_double_quick2:
    816     movsd %xmm0, (%r8)             // Store the double floating point result.
    817     ret
    818 .Lreturn_float_quick2:
    819     movss %xmm0, (%r8)             // Store the floating point result.
    820     ret
    821 #endif  // __APPLE__
    822 END_FUNCTION art_quick_invoke_static_stub
    823 
    824     /*
    825      * Long jump stub.
    826      * On entry:
    827      *   rdi = gprs
    828      *   rsi = fprs
    829      */
    830 DEFINE_FUNCTION art_quick_do_long_jump
    831 #if defined(__APPLE__)
    832     int3
    833     int3
    834 #else
    835     // Restore FPRs.
    836     movq 0(%rsi), %xmm0
    837     movq 8(%rsi), %xmm1
    838     movq 16(%rsi), %xmm2
    839     movq 24(%rsi), %xmm3
    840     movq 32(%rsi), %xmm4
    841     movq 40(%rsi), %xmm5
    842     movq 48(%rsi), %xmm6
    843     movq 56(%rsi), %xmm7
    844     movq 64(%rsi), %xmm8
    845     movq 72(%rsi), %xmm9
    846     movq 80(%rsi), %xmm10
    847     movq 88(%rsi), %xmm11
    848     movq 96(%rsi), %xmm12
    849     movq 104(%rsi), %xmm13
    850     movq 112(%rsi), %xmm14
    851     movq 120(%rsi), %xmm15
    852     // Restore FPRs.
    853     movq %rdi, %rsp   // RSP points to gprs.
    854     // Load all registers except RSP and RIP with values in gprs.
    855     popq %r15
    856     popq %r14
    857     popq %r13
    858     popq %r12
    859     popq %r11
    860     popq %r10
    861     popq %r9
    862     popq %r8
    863     popq %rdi
    864     popq %rsi
    865     popq %rbp
    866     addq LITERAL(8), %rsp   // Skip rsp
    867     popq %rbx
    868     popq %rdx
    869     popq %rcx
    870     popq %rax
    871     popq %rsp      // Load stack pointer.
    872     ret            // From higher in the stack pop rip.
    873 #endif  // __APPLE__
    874 END_FUNCTION art_quick_do_long_jump
    875 
    876 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    877     DEFINE_FUNCTION VAR(c_name)
    878     SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
    879     // Outgoing argument set up
    880     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
    881     call CALLVAR(cxx_name)               // cxx_name(arg0, Thread*)
    882     RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
    883     CALL_MACRO(return_macro)             // return or deliver exception
    884     END_FUNCTION VAR(c_name)
    885 END_MACRO
    886 
    887 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    888     DEFINE_FUNCTION VAR(c_name)
    889     SETUP_SAVE_REFS_ONLY_FRAME           // save ref containing registers for GC
    890     // Outgoing argument set up
    891     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
    892     call CALLVAR(cxx_name)               // cxx_name(arg0, arg1, Thread*)
    893     RESTORE_SAVE_REFS_ONLY_FRAME         // restore frame up to return address
    894     CALL_MACRO(return_macro)             // return or deliver exception
    895     END_FUNCTION VAR(c_name)
    896 END_MACRO
    897 
    898 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    899     DEFINE_FUNCTION VAR(c_name)
    900     SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
    901     // Outgoing argument set up
    902     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
    903     call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
    904     RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
    905     CALL_MACRO(return_macro)            // return or deliver exception
    906     END_FUNCTION VAR(c_name)
    907 END_MACRO
    908 
    909 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    910     DEFINE_FUNCTION VAR(c_name)
    911     SETUP_SAVE_REFS_ONLY_FRAME          // save ref containing registers for GC
    912     // Outgoing argument set up
    913     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
    914     call CALLVAR(cxx_name)              // cxx_name(arg1, arg2, arg3, arg4, Thread*)
    915     RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
    916     CALL_MACRO(return_macro)            // return or deliver exception
    917     END_FUNCTION VAR(c_name)
    918 END_MACRO
    919 
    920 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    921     DEFINE_FUNCTION VAR(c_name)
    922     SETUP_SAVE_REFS_ONLY_FRAME
    923                                         // arg0 is in rdi
    924     movq %gs:THREAD_SELF_OFFSET, %rsi   // pass Thread::Current()
    925     call CALLVAR(cxx_name)              // cxx_name(arg0, Thread*)
    926     RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
    927     CALL_MACRO(return_macro)
    928     END_FUNCTION VAR(c_name)
    929 END_MACRO
    930 
    931 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    932     DEFINE_FUNCTION VAR(c_name)
    933     SETUP_SAVE_REFS_ONLY_FRAME
    934                                         // arg0 and arg1 are in rdi/rsi
    935     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
    936     call CALLVAR(cxx_name)              // (arg0, arg1, Thread*)
    937     RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
    938     CALL_MACRO(return_macro)
    939     END_FUNCTION VAR(c_name)
    940 END_MACRO
    941 
    942 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    943     DEFINE_FUNCTION VAR(c_name)
    944     SETUP_SAVE_REFS_ONLY_FRAME
    945                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
    946     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
    947     call CALLVAR(cxx_name)              // cxx_name(arg0, arg1, arg2, Thread*)
    948     RESTORE_SAVE_REFS_ONLY_FRAME        // restore frame up to return address
    949     CALL_MACRO(return_macro)            // return or deliver exception
    950     END_FUNCTION VAR(c_name)
    951 END_MACRO
    952 
    953 // Macro for string and type resolution and initialization.
    954 MACRO2(ONE_ARG_SAVE_EVERYTHING_DOWNCALL, c_name, cxx_name)
    955     DEFINE_FUNCTION VAR(c_name)
    956     SETUP_SAVE_EVERYTHING_FRAME                   // save everything for GC
    957     // Outgoing argument set up
    958     movl %eax, %edi                               // pass string index
    959     movq %gs:THREAD_SELF_OFFSET, %rsi             // pass Thread::Current()
    960     call CALLVAR(cxx_name)                        // cxx_name(arg0, Thread*)
    961     testl %eax, %eax                              // If result is null, deliver the OOME.
    962     jz 1f
    963     CFI_REMEMBER_STATE
    964     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX        // restore frame up to return address
    965     ret
    966     CFI_RESTORE_STATE
    967     CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)  // workaround for clang bug: 31975598
    968 1:
    969     DELIVER_PENDING_EXCEPTION_FRAME_READY
    970     END_FUNCTION VAR(c_name)
    971 END_MACRO
    972 
    973 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER)
    974     testq %rax, %rax               // rax == 0 ?
    975     jz  1f                         // if rax == 0 goto 1
    976     ret                            // return
    977 1:                                 // deliver exception on current thread
    978     DELIVER_PENDING_EXCEPTION
    979 END_MACRO
    980 
    981 MACRO0(RETURN_IF_EAX_ZERO)
    982     testl %eax, %eax               // eax == 0 ?
    983     jnz  1f                        // if eax != 0 goto 1
    984     ret                            // return
    985 1:                                 // deliver exception on current thread
    986     DELIVER_PENDING_EXCEPTION
    987 END_MACRO
    988 
    989 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
    990     movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
    991     testq %rcx, %rcx               // rcx == 0 ?
    992     jnz 1f                         // if rcx != 0 goto 1
    993     ret                            // return
    994 1:                                 // deliver exception on current thread
    995     DELIVER_PENDING_EXCEPTION
    996 END_MACRO
    997 
    998 // Generate the allocation entrypoints for each allocator.
    999 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
   1000 
   1001 // Comment out allocators that have x86_64 specific asm.
   1002 // Region TLAB:
   1003 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
   1004 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
   1005 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1006 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
   1007 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
   1008 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
   1009 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
   1010 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
   1011 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
   1012 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
   1013 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
   1014 // Normal TLAB:
   1015 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
   1016 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
   1017 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
   1018 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
   1019 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
   1020 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
   1021 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
   1022 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
   1023 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
   1024 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
   1025 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
   1026 
   1027 
   1028 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc).
   1029 MACRO2(ART_QUICK_ALLOC_OBJECT_ROSALLOC, c_name, cxx_name)
   1030     DEFINE_FUNCTION VAR(c_name)
   1031     // Fast path rosalloc allocation.
   1032     // RDI: mirror::Class*, RAX: return value
   1033     // RSI, RDX, RCX, R8, R9: free.
   1034                                                            // Check if the thread local
   1035                                                            // allocation stack has room.
   1036     movq   %gs:THREAD_SELF_OFFSET, %r8                     // r8 = thread
   1037     movq   THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8), %rcx  // rcx = alloc stack top.
   1038     cmpq   THREAD_LOCAL_ALLOC_STACK_END_OFFSET(%r8), %rcx
   1039     jae    .Lslow_path\c_name
   1040                                                            // Load the object size
   1041     movl   MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %eax
   1042                                                            // Check if the size is for a thread
   1043                                                            // local allocation. Also does the
   1044                                                            // initialized and finalizable checks.
   1045     cmpl   LITERAL(ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE), %eax
   1046     ja     .Lslow_path\c_name
   1047                                                            // Compute the rosalloc bracket index
   1048                                                            // from the size.
   1049     shrq   LITERAL(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT), %rax
   1050                                                            // Load the rosalloc run (r9)
   1051                                                            // Subtract __SIZEOF_POINTER__ to
   1052                                                            // subtract one from edi as there is no
   1053                                                            // 0 byte run and the size is already
   1054                                                            // aligned.
   1055     movq   (THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)(%r8, %rax, __SIZEOF_POINTER__), %r9
   1056                                                            // Load the free list head (rax). This
   1057                                                            // will be the return val.
   1058     movq   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9), %rax
   1059     testq  %rax, %rax
   1060     jz     .Lslow_path\c_name
   1061     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber rdi and rsi.
   1062                                                            // Push the new object onto the thread
   1063                                                            // local allocation stack and
   1064                                                            // increment the thread local
   1065                                                            // allocation stack top.
   1066     movl   %eax, (%rcx)
   1067     addq   LITERAL(COMPRESSED_REFERENCE_SIZE), %rcx
   1068     movq   %rcx, THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET(%r8)
   1069                                                            // Load the next pointer of the head
   1070                                                            // and update the list head with the
   1071                                                            // next pointer.
   1072     movq   ROSALLOC_SLOT_NEXT_OFFSET(%rax), %rcx
   1073     movq   %rcx, (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)(%r9)
   1074                                                            // Store the class pointer in the
   1075                                                            // header. This also overwrites the
   1076                                                            // next pointer. The offsets are
   1077                                                            // asserted to match.
   1078 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
   1079 #error "Class pointer needs to overwrite next pointer."
   1080 #endif
   1081     POISON_HEAP_REF edi
   1082     movl   %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
   1083                                                            // Decrement the size of the free list
   1084     decl   (ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)(%r9)
   1085                                                            // No fence necessary for x86.
   1086     ret
   1087 .Lslow_path\c_name:
   1088     SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
   1089     // Outgoing argument set up
   1090     movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
   1091     call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
   1092     RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
   1093     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
   1094     END_FUNCTION VAR(c_name)
   1095 END_MACRO
   1096 
   1097 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
   1098 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
   1099 
   1100 // The common fast path code for art_quick_alloc_object_resolved_region_tlab.
   1101 // TODO: delete ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH since it is the same as
   1102 // ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH.
   1103 //
   1104 // RDI: the class, RAX: return value.
   1105 // RCX, RSI, RDX: scratch, r8: Thread::Current().
   1106 MACRO1(ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH, slowPathLabel)
   1107     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH(RAW_VAR(slowPathLabel))
   1108 END_MACRO
   1109 
   1110 // The fast path code for art_quick_alloc_object_initialized_region_tlab.
   1111 //
   1112 // RDI: the class, RSI: ArtMethod*, RAX: return value.
   1113 // RCX, RSI, RDX: scratch, r8: Thread::Current().
   1114 MACRO1(ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH, slowPathLabel)
   1115     movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
   1116     movl MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET(%rdi), %ecx // Load the object size.
   1117     movq THREAD_LOCAL_POS_OFFSET(%r8), %rax
   1118     addq %rax, %rcx                                            // Add size to pos, note that these
   1119                                                                // are both 32 bit ints, overflow
   1120                                                                // will cause the add to be past the
   1121                                                                // end of the thread local region.
   1122     cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
   1123     ja   RAW_VAR(slowPathLabel)
   1124     movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
   1125     incq THREAD_LOCAL_OBJECTS_OFFSET(%r8)                      // Increase thread_local_objects.
   1126                                                                // Store the class pointer in the
   1127                                                                // header.
   1128                                                                // No fence needed for x86.
   1129     POISON_HEAP_REF edi
   1130     movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
   1131     ret                                                        // Fast path succeeded.
   1132 END_MACRO
   1133 
   1134 // The fast path code for art_quick_alloc_array_region_tlab.
   1135 // Inputs: RDI: the class, RSI: int32_t component_count, R9: total_size
   1136 // Free temps: RCX, RDX, R8
   1137 // Output: RAX: return value.
   1138 MACRO1(ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE, slowPathLabel)
   1139     movq %gs:THREAD_SELF_OFFSET, %rcx                          // rcx = thread
   1140     // Mask out the unaligned part to make sure we are 8 byte aligned.
   1141     andq LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED64), %r9
   1142     movq THREAD_LOCAL_POS_OFFSET(%rcx), %rax
   1143     addq %rax, %r9
   1144     cmpq THREAD_LOCAL_END_OFFSET(%rcx), %r9                    // Check if it fits.
   1145     ja   RAW_VAR(slowPathLabel)
   1146     movq %r9, THREAD_LOCAL_POS_OFFSET(%rcx)                    // Update thread_local_pos.
   1147     addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%rcx)         // Increase thread_local_objects.
   1148                                                                // Store the class pointer in the
   1149                                                                // header.
   1150                                                                // No fence needed for x86.
   1151     POISON_HEAP_REF edi
   1152     movl %edi, MIRROR_OBJECT_CLASS_OFFSET(%rax)
   1153     movl %esi, MIRROR_ARRAY_LENGTH_OFFSET(%rax)
   1154     ret                                                        // Fast path succeeded.
   1155 END_MACRO
   1156 
   1157 // The common slow path code for art_quick_alloc_object_{resolved, initialized}_tlab
   1158 // and art_quick_alloc_object_{resolved, initialized}_region_tlab.
   1159 MACRO1(ALLOC_OBJECT_TLAB_SLOW_PATH, cxx_name)
   1160     SETUP_SAVE_REFS_ONLY_FRAME                             // save ref containing registers for GC
   1161     // Outgoing argument set up
   1162     movq %gs:THREAD_SELF_OFFSET, %rsi                      // pass Thread::Current()
   1163     call CALLVAR(cxx_name)                                 // cxx_name(arg0, Thread*)
   1164     RESTORE_SAVE_REFS_ONLY_FRAME                           // restore frame up to return address
   1165     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                // return or deliver exception
   1166 END_MACRO
   1167 
   1168 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB). May be
   1169 // called with CC if the GC is not active.
   1170 DEFINE_FUNCTION art_quick_alloc_object_resolved_tlab
   1171     // RDI: mirror::Class* klass
   1172     // RDX, RSI, RCX, R8, R9: free. RAX: return val.
   1173     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_tlab_slow_path
   1174 .Lart_quick_alloc_object_resolved_tlab_slow_path:
   1175     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedTLAB
   1176 END_FUNCTION art_quick_alloc_object_resolved_tlab
   1177 
   1178 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB).
   1179 // May be called with CC if the GC is not active.
   1180 DEFINE_FUNCTION art_quick_alloc_object_initialized_tlab
   1181     // RDI: mirror::Class* klass
   1182     // RDX, RSI, RCX, R8, R9: free. RAX: return val.
   1183     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_tlab_slow_path
   1184 .Lart_quick_alloc_object_initialized_tlab_slow_path:
   1185     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedTLAB
   1186 END_FUNCTION art_quick_alloc_object_initialized_tlab
   1187 
   1188 MACRO0(COMPUTE_ARRAY_SIZE_UNKNOWN)
   1189     movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rdi), %ecx        // Load component type.
   1190     UNPOISON_HEAP_REF ecx
   1191     movl MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET(%rcx), %ecx // Load primitive type.
   1192     shrq MACRO_LITERAL(PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT), %rcx        // Get component size shift.
   1193     movq %rsi, %r9
   1194     salq %cl, %r9                                              // Calculate array count shifted.
   1195     // Add array header + alignment rounding.
   1196     addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
   1197     // Add 4 extra bytes if we are doing a long array.
   1198     addq MACRO_LITERAL(1), %rcx
   1199     andq MACRO_LITERAL(4), %rcx
   1200 #if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
   1201 #error Long array data offset must be 4 greater than int array data offset.
   1202 #endif
   1203     addq %rcx, %r9
   1204 END_MACRO
   1205 
   1206 MACRO0(COMPUTE_ARRAY_SIZE_8)
   1207     // RDI: mirror::Class* klass, RSI: int32_t component_count
   1208     // RDX, RCX, R8, R9: free. RAX: return val.
   1209     movq %rsi, %r9
   1210     // Add array header + alignment rounding.
   1211     addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
   1212 END_MACRO
   1213 
   1214 MACRO0(COMPUTE_ARRAY_SIZE_16)
   1215     // RDI: mirror::Class* klass, RSI: int32_t component_count
   1216     // RDX, RCX, R8, R9: free. RAX: return val.
   1217     movq %rsi, %r9
   1218     salq MACRO_LITERAL(1), %r9
   1219     // Add array header + alignment rounding.
   1220     addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
   1221 END_MACRO
   1222 
   1223 MACRO0(COMPUTE_ARRAY_SIZE_32)
   1224     // RDI: mirror::Class* klass, RSI: int32_t component_count
   1225     // RDX, RCX, R8, R9: free. RAX: return val.
   1226     movq %rsi, %r9
   1227     salq MACRO_LITERAL(2), %r9
   1228     // Add array header + alignment rounding.
   1229     addq MACRO_LITERAL(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
   1230 END_MACRO
   1231 
   1232 MACRO0(COMPUTE_ARRAY_SIZE_64)
   1233     // RDI: mirror::Class* klass, RSI: int32_t component_count
   1234     // RDX, RCX, R8, R9: free. RAX: return val.
   1235     movq %rsi, %r9
   1236     salq MACRO_LITERAL(3), %r9
   1237     // Add array header + alignment rounding.
   1238     addq MACRO_LITERAL(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK), %r9
   1239 END_MACRO
   1240 
   1241 MACRO3(GENERATE_ALLOC_ARRAY_TLAB, c_entrypoint, cxx_name, size_setup)
   1242     DEFINE_FUNCTION VAR(c_entrypoint)
   1243     // RDI: mirror::Class* klass, RSI: int32_t component_count
   1244     // RDX, RCX, R8, R9: free. RAX: return val.
   1245     CALL_MACRO(size_setup)
   1246     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\c_entrypoint
   1247 .Lslow_path\c_entrypoint:
   1248     SETUP_SAVE_REFS_ONLY_FRAME                                 // save ref containing registers for GC
   1249     // Outgoing argument set up
   1250     movq %gs:THREAD_SELF_OFFSET, %rdx                          // pass Thread::Current()
   1251     call CALLVAR(cxx_name)                                     // cxx_name(arg0, arg1, Thread*)
   1252     RESTORE_SAVE_REFS_ONLY_FRAME                               // restore frame up to return address
   1253     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER                    // return or deliver exception
   1254     END_FUNCTION VAR(c_entrypoint)
   1255 END_MACRO
   1256 
   1257 
   1258 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1259 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
   1260 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
   1261 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
   1262 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
   1263 
   1264 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1265 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
   1266 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
   1267 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
   1268 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
   1269 
   1270 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB).
   1271 DEFINE_FUNCTION art_quick_alloc_object_resolved_region_tlab
   1272     // Fast path region tlab allocation.
   1273     // RDI: mirror::Class* klass
   1274     // RDX, RSI, RCX, R8, R9: free. RAX: return val.
   1275     ASSERT_USE_READ_BARRIER
   1276     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lart_quick_alloc_object_resolved_region_tlab_slow_path
   1277 .Lart_quick_alloc_object_resolved_region_tlab_slow_path:
   1278     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeResolvedRegionTLAB
   1279 END_FUNCTION art_quick_alloc_object_resolved_region_tlab
   1280 
   1281 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB).
   1282 DEFINE_FUNCTION art_quick_alloc_object_initialized_region_tlab
   1283     // Fast path region tlab allocation.
   1284     // RDI: mirror::Class* klass
   1285     // RDX, RSI, RCX, R8, R9: free. RAX: return val.
   1286     ASSERT_USE_READ_BARRIER
   1287     // No read barrier since the caller is responsible for that.
   1288     ALLOC_OBJECT_INITIALIZED_TLAB_FAST_PATH .Lart_quick_alloc_object_initialized_region_tlab_slow_path
   1289 .Lart_quick_alloc_object_initialized_region_tlab_slow_path:
   1290     ALLOC_OBJECT_TLAB_SLOW_PATH artAllocObjectFromCodeInitializedRegionTLAB
   1291 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
   1292 
   1293 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
   1294 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
   1295 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
   1296 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
   1297 
   1298 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
   1299 
   1300 DEFINE_FUNCTION art_quick_lock_object
   1301     testl %edi, %edi                      // Null check object/rdi.
   1302     jz   .Lslow_lock
   1303 .Lretry_lock:
   1304     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
   1305     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
   1306     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
   1307     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
   1308     andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
   1309     test %ecx, %ecx
   1310     jnz  .Lalready_thin                   // Lock word contains a thin lock.
   1311     // unlocked case - edx: original lock word, edi: obj.
   1312     movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
   1313     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
   1314     or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
   1315     lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1316     jnz  .Lretry_lock                     // cmpxchg failed retry
   1317     ret
   1318 .Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
   1319     movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
   1320     cmpw %cx, %dx                         // do we hold the lock already?
   1321     jne  .Lslow_lock
   1322     movl %edx, %ecx                       // copy the lock word to check count overflow.
   1323     andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %ecx  // zero the gc bits.
   1324     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
   1325     test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if the upper bit (28) is set
   1326     jne  .Lslow_lock                      // count overflowed so go slow
   1327     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
   1328     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
   1329     // update lockword, cmpxchg necessary for read barrier bits.
   1330     lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
   1331     jnz  .Lretry_lock                     // cmpxchg failed retry
   1332     ret
   1333 .Lslow_lock:
   1334     SETUP_SAVE_REFS_ONLY_FRAME
   1335     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1336     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
   1337     RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
   1338     RETURN_IF_EAX_ZERO
   1339 END_FUNCTION art_quick_lock_object
   1340 
   1341 DEFINE_FUNCTION art_quick_lock_object_no_inline
   1342     SETUP_SAVE_REFS_ONLY_FRAME
   1343     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1344     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
   1345     RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
   1346     RETURN_IF_EAX_ZERO
   1347 END_FUNCTION art_quick_lock_object_no_inline
   1348 
   1349 DEFINE_FUNCTION art_quick_unlock_object
   1350     testl %edi, %edi                      // null check object/edi
   1351     jz   .Lslow_unlock
   1352 .Lretry_unlock:
   1353     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
   1354     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
   1355     test LITERAL(LOCK_WORD_STATE_MASK), %ecx
   1356     jnz  .Lslow_unlock                    // lock word contains a monitor
   1357     cmpw %cx, %dx                         // does the thread id match?
   1358     jne  .Lslow_unlock
   1359     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
   1360     andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED), %edx  // zero the gc bits.
   1361     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
   1362     jae  .Lrecursive_thin_unlock
   1363     // update lockword, cmpxchg necessary for read barrier bits.
   1364     movl %ecx, %eax                       // eax: old lock word.
   1365     andl LITERAL(LOCK_WORD_GC_STATE_MASK_SHIFTED), %ecx  // ecx: new lock word zero except original gc bits.
   1366 #ifndef USE_READ_BARRIER
   1367     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1368 #else
   1369     lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
   1370     jnz  .Lretry_unlock                   // cmpxchg failed retry
   1371 #endif
   1372     ret
   1373 .Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
   1374     // update lockword, cmpxchg necessary for read barrier bits.
   1375     movl %ecx, %eax                       // eax: old lock word.
   1376     subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
   1377 #ifndef USE_READ_BARRIER
   1378     mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1379 #else
   1380     lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
   1381     jnz  .Lretry_unlock                   // cmpxchg failed retry
   1382 #endif
   1383     ret
   1384 .Lslow_unlock:
   1385     SETUP_SAVE_REFS_ONLY_FRAME
   1386     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1387     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
   1388     RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
   1389     RETURN_IF_EAX_ZERO
   1390 END_FUNCTION art_quick_unlock_object
   1391 
   1392 DEFINE_FUNCTION art_quick_unlock_object_no_inline
   1393     SETUP_SAVE_REFS_ONLY_FRAME
   1394     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1395     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
   1396     RESTORE_SAVE_REFS_ONLY_FRAME          // restore frame up to return address
   1397     RETURN_IF_EAX_ZERO
   1398 END_FUNCTION art_quick_unlock_object_no_inline
   1399 
   1400 DEFINE_FUNCTION art_quick_check_instance_of
   1401     // We could check the super classes here but that is usually already checked in the caller.
   1402     PUSH rdi                          // Save args for exc
   1403     PUSH rsi
   1404     subq LITERAL(8), %rsp             // Alignment padding.
   1405     CFI_ADJUST_CFA_OFFSET(8)
   1406     SETUP_FP_CALLEE_SAVE_FRAME
   1407     call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
   1408     testq %rax, %rax
   1409     jz 1f                             // jump forward if not assignable
   1410     RESTORE_FP_CALLEE_SAVE_FRAME
   1411     addq LITERAL(24), %rsp            // pop arguments
   1412     CFI_ADJUST_CFA_OFFSET(-24)
   1413 
   1414 .Lreturn:
   1415     ret
   1416 
   1417     CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
   1418 1:
   1419     RESTORE_FP_CALLEE_SAVE_FRAME
   1420     addq LITERAL(8), %rsp             // pop padding
   1421     CFI_ADJUST_CFA_OFFSET(-8)
   1422     POP rsi                           // Pop arguments
   1423     POP rdi
   1424     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
   1425     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
   1426     call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
   1427     UNREACHABLE
   1428 END_FUNCTION art_quick_check_instance_of
   1429 
   1430 
   1431 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
   1432 MACRO2(POP_REG_NE, reg, exclude_reg)
   1433     .ifc RAW_VAR(reg), RAW_VAR(exclude_reg)
   1434       addq MACRO_LITERAL(8), %rsp
   1435       CFI_ADJUST_CFA_OFFSET(-8)
   1436     .else
   1437       POP RAW_VAR(reg)
   1438     .endif
   1439 END_MACRO
   1440 
   1441     /*
   1442      * Macro to insert read barrier, used in art_quick_aput_obj.
   1443      * obj_reg and dest_reg{32|64} are registers, offset is a defined literal such as
   1444      * MIRROR_OBJECT_CLASS_OFFSET. dest_reg needs two versions to handle the mismatch between
   1445      * 64b PUSH/POP and 32b argument.
   1446      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
   1447      *
   1448      * As with art_quick_aput_obj function, the 64b versions are in comments.
   1449      */
   1450 MACRO4(READ_BARRIER, obj_reg, offset, dest_reg32, dest_reg64)
   1451 #ifdef USE_READ_BARRIER
   1452     PUSH rax                            // save registers that might be used
   1453     PUSH rdi
   1454     PUSH rsi
   1455     PUSH rdx
   1456     PUSH rcx
   1457     SETUP_FP_CALLEE_SAVE_FRAME
   1458     // Outgoing argument set up
   1459     // movl REG_VAR(ref_reg32), %edi    // pass ref, no-op for now since parameter ref is unused
   1460     // // movq REG_VAR(ref_reg64), %rdi
   1461     movl REG_VAR(obj_reg), %esi         // pass obj_reg
   1462     // movq REG_VAR(obj_reg), %rsi
   1463     movl MACRO_LITERAL((RAW_VAR(offset))), %edx // pass offset, double parentheses are necessary
   1464     // movq MACRO_LITERAL((RAW_VAR(offset))), %rdx
   1465     call SYMBOL(artReadBarrierSlow)     // artReadBarrierSlow(ref, obj_reg, offset)
   1466     // No need to unpoison return value in rax, artReadBarrierSlow() would do the unpoisoning.
   1467     .ifnc RAW_VAR(dest_reg32), eax
   1468     // .ifnc RAW_VAR(dest_reg64), rax
   1469       movl %eax, REG_VAR(dest_reg32)    // save loaded ref in dest_reg
   1470       // movq %rax, REG_VAR(dest_reg64)
   1471     .endif
   1472     RESTORE_FP_CALLEE_SAVE_FRAME
   1473     POP_REG_NE rcx, RAW_VAR(dest_reg64) // Restore registers except dest_reg
   1474     POP_REG_NE rdx, RAW_VAR(dest_reg64)
   1475     POP_REG_NE rsi, RAW_VAR(dest_reg64)
   1476     POP_REG_NE rdi, RAW_VAR(dest_reg64)
   1477     POP_REG_NE rax, RAW_VAR(dest_reg64)
   1478 #else
   1479     movl RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg32)
   1480     // movq RAW_VAR(offset)(REG_VAR(obj_reg)), REG_VAR(dest_reg64)
   1481     UNPOISON_HEAP_REF RAW_VAR(dest_reg32) // UNPOISON_HEAP_REF only takes a 32b register
   1482 #endif  // USE_READ_BARRIER
   1483 END_MACRO
   1484 
   1485 DEFINE_FUNCTION art_quick_aput_obj
   1486     testl %edx, %edx                // store of null
   1487 //  test %rdx, %rdx
   1488     jz .Ldo_aput_null
   1489     READ_BARRIER edi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
   1490     // READ_BARRIER rdi, MIRROR_OBJECT_CLASS_OFFSET, ecx, rcx
   1491     READ_BARRIER ecx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
   1492     // READ_BARRIER rcx, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, ecx, rcx
   1493 #if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
   1494     READ_BARRIER edx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax  // rax is free.
   1495     // READ_BARRIER rdx, MIRROR_OBJECT_CLASS_OFFSET, eax, rax
   1496     cmpl %eax, %ecx  // value's type == array's component type - trivial assignability
   1497 #else
   1498     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
   1499 //  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
   1500 #endif
   1501     jne .Lcheck_assignability
   1502 .Ldo_aput:
   1503     POISON_HEAP_REF edx
   1504     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1505 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1506     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
   1507     shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
   1508 //  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
   1509     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
   1510     ret
   1511 .Ldo_aput_null:
   1512     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1513 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1514     ret
   1515 .Lcheck_assignability:
   1516     // Save arguments.
   1517     PUSH rdi
   1518     PUSH rsi
   1519     PUSH rdx
   1520     SETUP_FP_CALLEE_SAVE_FRAME
   1521 
   1522 #if defined(USE_HEAP_POISONING) || defined(USE_READ_BARRIER)
   1523     // The load of MIRROR_OBJECT_CLASS_OFFSET(%edx) is redundant, eax still holds the value.
   1524     movl %eax, %esi               // Pass arg2 = value's class.
   1525     // movq %rax, %rsi
   1526 #else
   1527                                      // "Uncompress" = do nothing, as already zero-extended on load.
   1528     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi  // Pass arg2 = value's class.
   1529 #endif
   1530     movq %rcx, %rdi               // Pass arg1 = array's component type.
   1531 
   1532     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
   1533 
   1534     // Exception?
   1535     testq %rax, %rax
   1536     jz   .Lthrow_array_store_exception
   1537 
   1538     RESTORE_FP_CALLEE_SAVE_FRAME
   1539     // Restore arguments.
   1540     POP  rdx
   1541     POP  rsi
   1542     POP  rdi
   1543 
   1544     POISON_HEAP_REF edx
   1545     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1546 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1547     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
   1548     shrl LITERAL(CARD_TABLE_CARD_SHIFT), %edi
   1549 //  shrl LITERAL(CARD_TABLE_CARD_SHIFT), %rdi
   1550     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
   1551 //  movb %dl, (%rdx, %rdi)
   1552     ret
   1553     CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
   1554 .Lthrow_array_store_exception:
   1555     RESTORE_FP_CALLEE_SAVE_FRAME
   1556     // Restore arguments.
   1557     POP  rdx
   1558     POP  rsi
   1559     POP  rdi
   1560 
   1561     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // Save all registers as basis for long jump context.
   1562 
   1563     // Outgoing argument set up.
   1564     movq %rdx, %rsi                         // Pass arg 2 = value.
   1565     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
   1566                                             // Pass arg 1 = array.
   1567     call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
   1568     UNREACHABLE
   1569 END_FUNCTION art_quick_aput_obj
   1570 
   1571 // TODO: This is quite silly on X86_64 now.
   1572 DEFINE_FUNCTION art_quick_memcpy
   1573     call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
   1574     ret
   1575 END_FUNCTION art_quick_memcpy
   1576 
   1577 DEFINE_FUNCTION art_quick_test_suspend
   1578     SETUP_SAVE_EVERYTHING_FRAME                 // save everything for GC
   1579     // Outgoing argument set up
   1580     movq %gs:THREAD_SELF_OFFSET, %rdi           // pass Thread::Current()
   1581     call SYMBOL(artTestSuspendFromCode)         // (Thread*)
   1582     RESTORE_SAVE_EVERYTHING_FRAME               // restore frame up to return address
   1583     ret
   1584 END_FUNCTION art_quick_test_suspend
   1585 
   1586 UNIMPLEMENTED art_quick_ldiv
   1587 UNIMPLEMENTED art_quick_lmod
   1588 UNIMPLEMENTED art_quick_lmul
   1589 UNIMPLEMENTED art_quick_lshl
   1590 UNIMPLEMENTED art_quick_lshr
   1591 UNIMPLEMENTED art_quick_lushr
   1592 
   1593 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
   1594 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
   1595 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
   1596 THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_EAX_ZERO
   1597 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_EAX_ZERO
   1598 
   1599 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1600 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1601 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1602 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1603 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1604 TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1605 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1606 
   1607 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_EAX_ZERO
   1608 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_EAX_ZERO
   1609 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_EAX_ZERO
   1610 TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1611 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_EAX_ZERO
   1612 
   1613 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1614 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1615 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1616 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1617 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1618 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1619 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1620 
   1621 DEFINE_FUNCTION art_quick_proxy_invoke_handler
   1622     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
   1623 
   1624     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
   1625     movq %rsp, %rcx                         // Pass SP.
   1626     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
   1627     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1628     movq %rax, %xmm0                        // Copy return value in case of float returns.
   1629     RETURN_OR_DELIVER_PENDING_EXCEPTION
   1630 END_FUNCTION art_quick_proxy_invoke_handler
   1631 
   1632     /*
   1633      * Called to resolve an imt conflict.
   1634      * rdi is the conflict ArtMethod.
   1635      * rax is a hidden argument that holds the target interface method's dex method index.
   1636      *
   1637      * Note that this stub writes to r10 and rdi.
   1638      */
   1639 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
   1640 #if defined(__APPLE__)
   1641     int3
   1642     int3
   1643 #else
   1644     movq __SIZEOF_POINTER__(%rsp), %r10 // Load referrer.
   1645     movq ART_METHOD_DEX_CACHE_METHODS_OFFSET_64(%r10), %r10   // Load dex cache methods array.
   1646     mov %eax, %r11d  // Remember method index in R11.
   1647     andl LITERAL(METHOD_DEX_CACHE_SIZE_MINUS_ONE), %eax  // Calculate DexCache method slot index.
   1648     shll LITERAL(1), %eax       // Multiply by 2 as entries have size 2 * __SIZEOF_POINTER__.
   1649     leaq 0(%r10, %rax, __SIZEOF_POINTER__), %r10 // Load DexCache method slot address.
   1650     PUSH rdx                    // Preserve RDX as we need to clobber it by LOCK CMPXCHG16B.
   1651     mov %rcx, %rdx              // Make RDX:RAX == RCX:RBX so that LOCK CMPXCHG16B makes no changes.
   1652     mov %rbx, %rax              // (The actual value does not matter.)
   1653     lock cmpxchg16b (%r10)      // Relaxed atomic load RDX:RAX from the dex cache slot.
   1654     movq ART_METHOD_JNI_OFFSET_64(%rdi), %rdi  // Load ImtConflictTable
   1655     cmp %rdx, %r11              // Compare method index to see if we had a DexCache method hit.
   1656     jne .Limt_conflict_trampoline_dex_cache_miss
   1657 .Limt_table_iterate:
   1658     cmpq %rax, 0(%rdi)
   1659     jne .Limt_table_next_entry
   1660     // We successfully hit an entry in the table. Load the target method
   1661     // and jump to it.
   1662     movq __SIZEOF_POINTER__(%rdi), %rdi
   1663     CFI_REMEMBER_STATE
   1664     POP rdx
   1665     jmp *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi)
   1666     CFI_RESTORE_STATE
   1667 .Limt_table_next_entry:
   1668     // If the entry is null, the interface method is not in the ImtConflictTable.
   1669     cmpq LITERAL(0), 0(%rdi)
   1670     jz .Lconflict_trampoline
   1671     // Iterate over the entries of the ImtConflictTable.
   1672     addq LITERAL(2 * __SIZEOF_POINTER__), %rdi
   1673     jmp .Limt_table_iterate
   1674 .Lconflict_trampoline:
   1675     // Call the runtime stub to populate the ImtConflictTable and jump to the
   1676     // resolved method.
   1677     CFI_REMEMBER_STATE
   1678     POP rdx
   1679     movq %rax, %rdi  // Load interface method
   1680     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
   1681     CFI_RESTORE_STATE
   1682 .Limt_conflict_trampoline_dex_cache_miss:
   1683     // We're not creating a proper runtime method frame here,
   1684     // artLookupResolvedMethod() is not allowed to walk the stack.
   1685 
   1686     // Save GPR args and ImtConflictTable; RDX is already saved.
   1687     PUSH r9   // Quick arg 5.
   1688     PUSH r8   // Quick arg 4.
   1689     PUSH rsi  // Quick arg 1.
   1690     PUSH rcx  // Quick arg 3.
   1691     PUSH rdi  // ImtConflictTable
   1692     // Save FPR args and callee-saves, align stack to 16B.
   1693     subq MACRO_LITERAL(12 * 8 + 8), %rsp
   1694     CFI_ADJUST_CFA_OFFSET(12 * 8 + 8)
   1695     movq %xmm0, 0(%rsp)
   1696     movq %xmm1, 8(%rsp)
   1697     movq %xmm2, 16(%rsp)
   1698     movq %xmm3, 24(%rsp)
   1699     movq %xmm4, 32(%rsp)
   1700     movq %xmm5, 40(%rsp)
   1701     movq %xmm6, 48(%rsp)
   1702     movq %xmm7, 56(%rsp)
   1703     movq %xmm12, 64(%rsp)  // XMM12-15 are callee-save in ART compiled code ABI
   1704     movq %xmm13, 72(%rsp)  // but caller-save in native ABI.
   1705     movq %xmm14, 80(%rsp)
   1706     movq %xmm15, 88(%rsp)
   1707 
   1708     movq %r11, %rdi             // Pass method index.
   1709     movq 12 * 8 + 8 + 6 * 8 + 8(%rsp), %rsi   // Pass referrer.
   1710     call SYMBOL(artLookupResolvedMethod)  // (uint32_t method_index, ArtMethod* referrer)
   1711 
   1712     // Restore FPRs.
   1713     movq 0(%rsp), %xmm0
   1714     movq 8(%rsp), %xmm1
   1715     movq 16(%rsp), %xmm2
   1716     movq 24(%rsp), %xmm3
   1717     movq 32(%rsp), %xmm4
   1718     movq 40(%rsp), %xmm5
   1719     movq 48(%rsp), %xmm6
   1720     movq 56(%rsp), %xmm7
   1721     movq 64(%rsp), %xmm12
   1722     movq 72(%rsp), %xmm13
   1723     movq 80(%rsp), %xmm14
   1724     movq 88(%rsp), %xmm15
   1725     addq MACRO_LITERAL(12 * 8 + 8), %rsp
   1726     CFI_ADJUST_CFA_OFFSET(-(12 * 8 + 8))
   1727     // Restore ImtConflictTable and GPR args.
   1728     POP rdi
   1729     POP rcx
   1730     POP rsi
   1731     POP r8
   1732     POP r9
   1733 
   1734     cmp LITERAL(0), %rax        // If the method wasn't resolved,
   1735     je .Lconflict_trampoline    //   skip the lookup and go to artInvokeInterfaceTrampoline().
   1736     jmp .Limt_table_iterate
   1737 #endif  // __APPLE__
   1738 END_FUNCTION art_quick_imt_conflict_trampoline
   1739 
   1740 DEFINE_FUNCTION art_quick_resolution_trampoline
   1741     SETUP_SAVE_REFS_AND_ARGS_FRAME
   1742     movq %gs:THREAD_SELF_OFFSET, %rdx
   1743     movq %rsp, %rcx
   1744     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
   1745     movq %rax, %r10               // Remember returned code pointer in R10.
   1746     movq (%rsp), %rdi             // Load called method into RDI.
   1747     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1748     testq %r10, %r10              // If code pointer is null goto deliver pending exception.
   1749     jz 1f
   1750     jmp *%r10                     // Tail call into method.
   1751 1:
   1752     DELIVER_PENDING_EXCEPTION
   1753 END_FUNCTION art_quick_resolution_trampoline
   1754 
   1755 /* Generic JNI frame layout:
   1756  *
   1757  * #-------------------#
   1758  * |                   |
   1759  * | caller method...  |
   1760  * #-------------------#    <--- SP on entry
   1761  *
   1762  *          |
   1763  *          V
   1764  *
   1765  * #-------------------#
   1766  * | caller method...  |
   1767  * #-------------------#
   1768  * | Return            |
   1769  * | R15               |    callee save
   1770  * | R14               |    callee save
   1771  * | R13               |    callee save
   1772  * | R12               |    callee save
   1773  * | R9                |    arg5
   1774  * | R8                |    arg4
   1775  * | RSI/R6            |    arg1
   1776  * | RBP/R5            |    callee save
   1777  * | RBX/R3            |    callee save
   1778  * | RDX/R2            |    arg2
   1779  * | RCX/R1            |    arg3
   1780  * | XMM7              |    float arg 8
   1781  * | XMM6              |    float arg 7
   1782  * | XMM5              |    float arg 6
   1783  * | XMM4              |    float arg 5
   1784  * | XMM3              |    float arg 4
   1785  * | XMM2              |    float arg 3
   1786  * | XMM1              |    float arg 2
   1787  * | XMM0              |    float arg 1
   1788  * | RDI/Method*       |  <- sp
   1789  * #-------------------#
   1790  * | Scratch Alloca    |    5K scratch space
   1791  * #---------#---------#
   1792  * |         | sp*     |
   1793  * | Tramp.  #---------#
   1794  * | args    | thread  |
   1795  * | Tramp.  #---------#
   1796  * |         | method  |
   1797  * #-------------------#    <--- SP on artQuickGenericJniTrampoline
   1798  *
   1799  *           |
   1800  *           v              artQuickGenericJniTrampoline
   1801  *
   1802  * #-------------------#
   1803  * | caller method...  |
   1804  * #-------------------#
   1805  * | Return            |
   1806  * | Callee-Save Data  |
   1807  * #-------------------#
   1808  * | handle scope      |
   1809  * #-------------------#
   1810  * | Method*           |    <--- (1)
   1811  * #-------------------#
   1812  * | local ref cookie  | // 4B
   1813  * | handle scope size | // 4B   TODO: roll into call stack alignment?
   1814  * #-------------------#
   1815  * | JNI Call Stack    |
   1816  * #-------------------#    <--- SP on native call
   1817  * |                   |
   1818  * | Stack for Regs    |    The trampoline assembly will pop these values
   1819  * |                   |    into registers for native call
   1820  * #-------------------#
   1821  * | Native code ptr   |
   1822  * #-------------------#
   1823  * | Free scratch      |
   1824  * #-------------------#
   1825  * | Ptr to (1)        |    <--- RSP
   1826  * #-------------------#
   1827  */
   1828     /*
   1829      * Called to do a generic JNI down-call
   1830      */
   1831 DEFINE_FUNCTION art_quick_generic_jni_trampoline
   1832     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_RDI
   1833 
   1834     movq %rsp, %rbp                 // save SP at (old) callee-save frame
   1835     CFI_DEF_CFA_REGISTER(rbp)
   1836 
   1837     //
   1838     // reserve a lot of space
   1839     //
   1840     //      4    local state ref
   1841     //      4    padding
   1842     //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
   1843     //     16    handle scope member fields ?
   1844     // +  112    14x 8-byte stack-2-register space
   1845     // ------
   1846     //   4332
   1847     // 16-byte aligned: 4336
   1848     // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
   1849     //       Also means: the padding is somewhere in the middle
   1850     //
   1851     //
   1852     // New test: use 5K and release
   1853     // 5k = 5120
   1854     subq LITERAL(5120), %rsp
   1855     // prepare for artQuickGenericJniTrampoline call
   1856     // (Thread*,  SP)
   1857     //    rdi    rsi      <= C calling convention
   1858     //  gs:...   rbp      <= where they are
   1859     movq %gs:THREAD_SELF_OFFSET, %rdi
   1860     movq %rbp, %rsi
   1861     call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
   1862 
   1863     // The C call will have registered the complete save-frame on success.
   1864     // The result of the call is:
   1865     // %rax: pointer to native code, 0 on error.
   1866     // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
   1867 
   1868     // Check for error = 0.
   1869     test %rax, %rax
   1870     jz .Lexception_in_native
   1871 
   1872     // Release part of the alloca.
   1873     movq %rdx, %rsp
   1874 
   1875     // pop from the register-passing alloca region
   1876     // what's the right layout?
   1877     popq %rdi
   1878     popq %rsi
   1879     popq %rdx
   1880     popq %rcx
   1881     popq %r8
   1882     popq %r9
   1883     // TODO: skip floating point if unused, some flag.
   1884     movq 0(%rsp), %xmm0
   1885     movq 8(%rsp), %xmm1
   1886     movq 16(%rsp), %xmm2
   1887     movq 24(%rsp), %xmm3
   1888     movq 32(%rsp), %xmm4
   1889     movq 40(%rsp), %xmm5
   1890     movq 48(%rsp), %xmm6
   1891     movq 56(%rsp), %xmm7
   1892     addq LITERAL(64), %rsp          // floating-point done
   1893 
   1894     // native call
   1895     call *%rax
   1896 
   1897     // result sign extension is handled in C code
   1898     // prepare for artQuickGenericJniEndTrampoline call
   1899     // (Thread*,  result, result_f)
   1900     //   rdi      rsi   rdx       <= C calling convention
   1901     //  gs:...    rax   xmm0      <= where they are
   1902     movq %gs:THREAD_SELF_OFFSET, %rdi
   1903     movq %rax, %rsi
   1904     movq %xmm0, %rdx
   1905     call SYMBOL(artQuickGenericJniEndTrampoline)
   1906 
   1907     // Pending exceptions possible.
   1908     // TODO: use cmpq, needs direct encoding because of gas bug
   1909     movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
   1910     test %rcx, %rcx
   1911     jnz .Lexception_in_native
   1912 
   1913     // Tear down the alloca.
   1914     movq %rbp, %rsp
   1915     CFI_DEF_CFA_REGISTER(rsp)
   1916 
   1917     // Tear down the callee-save frame.
   1918     // Load FPRs.
   1919     // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
   1920     movq 24(%rsp), %xmm1            // neither does this!!!
   1921     movq 32(%rsp), %xmm2
   1922     movq 40(%rsp), %xmm3
   1923     movq 48(%rsp), %xmm4
   1924     movq 56(%rsp), %xmm5
   1925     movq 64(%rsp), %xmm6
   1926     movq 72(%rsp), %xmm7
   1927     movq 80(%rsp), %xmm12
   1928     movq 88(%rsp), %xmm13
   1929     movq 96(%rsp), %xmm14
   1930     movq 104(%rsp), %xmm15
   1931     // was 80 bytes
   1932     addq LITERAL(80 + 4*8), %rsp
   1933     CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
   1934     // Save callee and GPR args, mixed together to agree with core spills bitmap.
   1935     POP rcx  // Arg.
   1936     POP rdx  // Arg.
   1937     POP rbx  // Callee save.
   1938     POP rbp  // Callee save.
   1939     POP rsi  // Arg.
   1940     POP r8   // Arg.
   1941     POP r9   // Arg.
   1942     POP r12  // Callee save.
   1943     POP r13  // Callee save.
   1944     POP r14  // Callee save.
   1945     POP r15  // Callee save.
   1946     // store into fpr, for when it's a fpr return...
   1947     movq %rax, %xmm0
   1948     ret
   1949 .Lexception_in_native:
   1950     movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
   1951     CFI_DEF_CFA_REGISTER(rsp)
   1952     // Do a call to push a new save-all frame required by the runtime.
   1953     call .Lexception_call
   1954 .Lexception_call:
   1955     DELIVER_PENDING_EXCEPTION
   1956 END_FUNCTION art_quick_generic_jni_trampoline
   1957 
   1958     /*
   1959      * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
   1960      * of a quick call:
   1961      * RDI = method being called / to bridge to.
   1962      * RSI, RDX, RCX, R8, R9 are arguments to that method.
   1963      */
   1964 DEFINE_FUNCTION art_quick_to_interpreter_bridge
   1965     SETUP_SAVE_REFS_AND_ARGS_FRAME     // Set up frame and save arguments.
   1966     movq %gs:THREAD_SELF_OFFSET, %rsi  // RSI := Thread::Current()
   1967     movq %rsp, %rdx                    // RDX := sp
   1968     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
   1969     RESTORE_SAVE_REFS_AND_ARGS_FRAME   // TODO: no need to restore arguments in this case.
   1970     movq %rax, %xmm0                   // Place return value also into floating point return value.
   1971     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
   1972 END_FUNCTION art_quick_to_interpreter_bridge
   1973 
   1974     /*
   1975      * Called to catch an attempt to invoke an obsolete method.
   1976      * RDI = method being called.
   1977      */
   1978 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
   1979 
   1980     /*
   1981      * Routine that intercepts method calls and returns.
   1982      */
   1983 DEFINE_FUNCTION art_quick_instrumentation_entry
   1984 #if defined(__APPLE__)
   1985     int3
   1986     int3
   1987 #else
   1988     SETUP_SAVE_REFS_AND_ARGS_FRAME
   1989 
   1990     movq %rdi, %r12               // Preserve method pointer in a callee-save.
   1991 
   1992     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
   1993     movq %rsp, %rcx                     // Pass SP.
   1994 
   1995     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, SP)
   1996 
   1997                                   // %rax = result of call.
   1998     testq %rax, %rax
   1999     jz 1f
   2000 
   2001     movq %r12, %rdi               // Reload method pointer.
   2002     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
   2003     movq %r12, FRAME_SIZE_SAVE_REFS_AND_ARGS-8(%rsp)  // exit.
   2004 
   2005     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2006 
   2007     jmp *%rax                     // Tail call to intended method.
   2008 1:
   2009     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2010     DELIVER_PENDING_EXCEPTION
   2011 #endif  // __APPLE__
   2012 END_FUNCTION art_quick_instrumentation_entry
   2013 
   2014 DEFINE_FUNCTION_CUSTOM_CFA art_quick_instrumentation_exit, 0
   2015     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
   2016     CFI_ADJUST_CFA_OFFSET(8)
   2017 
   2018     SETUP_SAVE_REFS_ONLY_FRAME
   2019 
   2020     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
   2021     // we would need to fully restore it. As there are a good number of callee-save registers, it
   2022     // seems easier to have an extra small stack area. But this should be revisited.
   2023 
   2024     movq  %rsp, %rsi                          // Pass SP.
   2025 
   2026     PUSH rax                  // Save integer result.
   2027     movq %rsp, %rdx           // Pass integer result pointer.
   2028 
   2029     subq LITERAL(8), %rsp     // Save floating-point result.
   2030     CFI_ADJUST_CFA_OFFSET(8)
   2031     movq %xmm0, (%rsp)
   2032     movq %rsp, %rcx           // Pass floating-point result pointer.
   2033 
   2034     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
   2035 
   2036     call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res*, fpr_res*)
   2037 
   2038     movq  %rax, %rdi          // Store return PC
   2039     movq  %rdx, %rsi          // Store second return PC in hidden arg.
   2040 
   2041     movq (%rsp), %xmm0        // Restore floating-point result.
   2042     addq LITERAL(8), %rsp
   2043     CFI_ADJUST_CFA_OFFSET(-8)
   2044     POP rax                   // Restore integer result.
   2045 
   2046     RESTORE_SAVE_REFS_ONLY_FRAME
   2047 
   2048     testq %rdi, %rdi          // Check if we have a return-pc to go to. If we don't then there was
   2049                               // an exception
   2050     jz 1f
   2051 
   2052     addq LITERAL(8), %rsp     // Drop fake return pc.
   2053 
   2054     jmp   *%rdi               // Return.
   2055 1:
   2056     DELIVER_PENDING_EXCEPTION
   2057 END_FUNCTION art_quick_instrumentation_exit
   2058 
   2059     /*
   2060      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   2061      * will long jump to the upcall with a special exception of -1.
   2062      */
   2063 DEFINE_FUNCTION art_quick_deoptimize
   2064     pushq %rsi                         // Entry point for a jump. Fake that we were called.
   2065                                        // Use hidden arg.
   2066     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
   2067                                        // Stack should be aligned now.
   2068     movq %gs:THREAD_SELF_OFFSET, %rdi  // Pass Thread.
   2069     call SYMBOL(artDeoptimize)         // (Thread*)
   2070     UNREACHABLE
   2071 END_FUNCTION art_quick_deoptimize
   2072 
   2073     /*
   2074      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
   2075      * will long jump to the interpreter bridge.
   2076      */
   2077 DEFINE_FUNCTION art_quick_deoptimize_from_compiled_code
   2078     SETUP_SAVE_EVERYTHING_FRAME
   2079                                                 // Stack should be aligned now.
   2080     movq %gs:THREAD_SELF_OFFSET, %rsi           // Pass Thread.
   2081     call SYMBOL(artDeoptimizeFromCompiledCode)  // (DeoptimizationKind, Thread*)
   2082     UNREACHABLE
   2083 END_FUNCTION art_quick_deoptimize_from_compiled_code
   2084 
   2085     /*
   2086      * String's compareTo.
   2087      *
   2088      * On entry:
   2089      *    rdi:   this string object (known non-null)
   2090      *    rsi:   comp string object (known non-null)
   2091      */
   2092 DEFINE_FUNCTION art_quick_string_compareto
   2093     movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
   2094     movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
   2095     /* Build pointers to the start of string data */
   2096     leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
   2097     leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
   2098 #if (STRING_COMPRESSION_FEATURE)
   2099     /* Differ cases */
   2100     shrl    LITERAL(1), %r8d
   2101     jnc     .Lstring_compareto_this_is_compressed
   2102     shrl    LITERAL(1), %r9d
   2103     jnc     .Lstring_compareto_that_is_compressed
   2104     jmp     .Lstring_compareto_both_not_compressed
   2105 .Lstring_compareto_this_is_compressed:
   2106     shrl    LITERAL(1), %r9d
   2107     jnc     .Lstring_compareto_both_compressed
   2108     /* Comparison this (8-bit) and that (16-bit) */
   2109     mov     %r8d, %eax
   2110     subl    %r9d, %eax
   2111     mov     %r8d, %ecx
   2112     cmovg   %r9d, %ecx
   2113     /* Going into loop to compare each character */
   2114     jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
   2115 .Lstring_compareto_loop_comparison_this_compressed:
   2116     movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
   2117     movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
   2118     addl    LITERAL(1), %edi                    // ++this_cur_char (8-bit)
   2119     addl    LITERAL(2), %esi                    // ++that_cur_char (16-bit)
   2120     subl    %r9d, %r8d
   2121     loope   .Lstring_compareto_loop_comparison_this_compressed
   2122     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
   2123 .Lstring_compareto_keep_length1:
   2124     ret
   2125 .Lstring_compareto_that_is_compressed:
   2126     movl    %r8d, %eax
   2127     subl    %r9d, %eax
   2128     mov     %r8d, %ecx
   2129     cmovg   %r9d, %ecx
   2130     /* Comparison this (8-bit) and that (16-bit) */
   2131     jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
   2132 .Lstring_compareto_loop_comparison_that_compressed:
   2133     movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
   2134     movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
   2135     addl    LITERAL(2), %edi                    // ++this_cur_char (16-bit)
   2136     addl    LITERAL(1), %esi                    // ++that_cur_char (8-bit)
   2137     subl    %r9d, %r8d
   2138     loope   .Lstring_compareto_loop_comparison_that_compressed
   2139     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
   2140 .Lstring_compareto_keep_length2:
   2141     ret
   2142 .Lstring_compareto_both_compressed:
   2143     /* Calculate min length and count diff */
   2144     movl    %r8d, %ecx
   2145     movl    %r8d, %eax
   2146     subl    %r9d, %eax
   2147     cmovg   %r9d, %ecx
   2148     jecxz   .Lstring_compareto_keep_length3
   2149     repe    cmpsb
   2150     je      .Lstring_compareto_keep_length3
   2151     movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
   2152     movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
   2153     jmp     .Lstring_compareto_count_difference
   2154 #endif // STRING_COMPRESSION_FEATURE
   2155 .Lstring_compareto_both_not_compressed:
   2156     /* Calculate min length and count diff */
   2157     movl    %r8d, %ecx
   2158     movl    %r8d, %eax
   2159     subl    %r9d, %eax
   2160     cmovg   %r9d, %ecx
   2161     /*
   2162      * At this point we have:
   2163      *   eax: value to return if first part of strings are equal
   2164      *   ecx: minimum among the lengths of the two strings
   2165      *   esi: pointer to comp string data
   2166      *   edi: pointer to this string data
   2167      */
   2168     jecxz .Lstring_compareto_keep_length3
   2169     repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
   2170     je    .Lstring_compareto_keep_length3
   2171     movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
   2172     movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
   2173 .Lstring_compareto_count_difference:
   2174     subl  %ecx, %eax              // return the difference
   2175 .Lstring_compareto_keep_length3:
   2176     ret
   2177 END_FUNCTION art_quick_string_compareto
   2178 
   2179 UNIMPLEMENTED art_quick_memcmp16
   2180 
   2181 DEFINE_FUNCTION art_quick_instance_of
   2182     SETUP_FP_CALLEE_SAVE_FRAME
   2183     subq LITERAL(8), %rsp                      // Alignment padding.
   2184     CFI_ADJUST_CFA_OFFSET(8)
   2185     call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
   2186     addq LITERAL(8), %rsp
   2187     CFI_ADJUST_CFA_OFFSET(-8)
   2188     RESTORE_FP_CALLEE_SAVE_FRAME
   2189     ret
   2190 END_FUNCTION art_quick_instance_of
   2191 
   2192 // Create a function `name` calling the ReadBarrier::Mark routine,
   2193 // getting its argument and returning its result through register
   2194 // `reg`, saving and restoring all caller-save registers.
   2195 //
   2196 // The generated function follows a non-standard runtime calling
   2197 // convention:
   2198 // - register `reg` (which may be different from RDI) is used to pass
   2199 //   the (sole) argument of this function;
   2200 // - register `reg` (which may be different from RAX) is used to return
   2201 //   the result of this function (instead of RAX);
   2202 // - if `reg` is different from `rdi`, RDI is treated like a normal
   2203 //   (non-argument) caller-save register;
   2204 // - if `reg` is different from `rax`, RAX is treated like a normal
   2205 //   (non-result) caller-save register;
   2206 // - everything else is the same as in the standard runtime calling
   2207 //   convention (e.g. standard callee-save registers are preserved).
   2208 MACRO2(READ_BARRIER_MARK_REG, name, reg)
   2209     DEFINE_FUNCTION VAR(name)
   2210     // Null check so that we can load the lock word.
   2211     testq REG_VAR(reg), REG_VAR(reg)
   2212     jz .Lret_rb_\name
   2213 .Lnot_null_\name:
   2214     // Check the mark bit, if it is 1 return.
   2215     testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
   2216     jz .Lslow_rb_\name
   2217     ret
   2218 .Lslow_rb_\name:
   2219     PUSH rax
   2220     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
   2221     addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
   2222     // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
   2223     // forwarding address one.
   2224     // Taken ~25% of the time.
   2225     jnae .Lret_forwarding_address\name
   2226 
   2227     // Save all potentially live caller-save core registers.
   2228     movq 0(%rsp), %rax
   2229     PUSH rcx
   2230     PUSH rdx
   2231     PUSH rsi
   2232     PUSH rdi
   2233     PUSH r8
   2234     PUSH r9
   2235     PUSH r10
   2236     PUSH r11
   2237     // Create space for caller-save floating-point registers.
   2238     subq MACRO_LITERAL(12 * 8), %rsp
   2239     CFI_ADJUST_CFA_OFFSET(12 * 8)
   2240     // Save all potentially live caller-save floating-point registers.
   2241     movq %xmm0, 0(%rsp)
   2242     movq %xmm1, 8(%rsp)
   2243     movq %xmm2, 16(%rsp)
   2244     movq %xmm3, 24(%rsp)
   2245     movq %xmm4, 32(%rsp)
   2246     movq %xmm5, 40(%rsp)
   2247     movq %xmm6, 48(%rsp)
   2248     movq %xmm7, 56(%rsp)
   2249     movq %xmm8, 64(%rsp)
   2250     movq %xmm9, 72(%rsp)
   2251     movq %xmm10, 80(%rsp)
   2252     movq %xmm11, 88(%rsp)
   2253     SETUP_FP_CALLEE_SAVE_FRAME
   2254 
   2255     .ifnc RAW_VAR(reg), rdi
   2256       movq REG_VAR(reg), %rdi       // Pass arg1 - obj from `reg`.
   2257     .endif
   2258     call SYMBOL(artReadBarrierMark) // artReadBarrierMark(obj)
   2259     .ifnc RAW_VAR(reg), rax
   2260       movq %rax, REG_VAR(reg)       // Return result into `reg`.
   2261     .endif
   2262 
   2263     RESTORE_FP_CALLEE_SAVE_FRAME
   2264     // Restore floating-point registers.
   2265     movq 0(%rsp), %xmm0
   2266     movq 8(%rsp), %xmm1
   2267     movq 16(%rsp), %xmm2
   2268     movq 24(%rsp), %xmm3
   2269     movq 32(%rsp), %xmm4
   2270     movq 40(%rsp), %xmm5
   2271     movq 48(%rsp), %xmm6
   2272     movq 56(%rsp), %xmm7
   2273     movq 64(%rsp), %xmm8
   2274     movq 72(%rsp), %xmm9
   2275     movq 80(%rsp), %xmm10
   2276     movq 88(%rsp), %xmm11
   2277     // Remove floating-point registers.
   2278     addq MACRO_LITERAL(12 * 8), %rsp
   2279     CFI_ADJUST_CFA_OFFSET(-(12 * 8))
   2280     // Restore core regs, except `reg`, as it is used to return the
   2281     // result of this function (simply remove it from the stack instead).
   2282     POP_REG_NE r11, RAW_VAR(reg)
   2283     POP_REG_NE r10, RAW_VAR(reg)
   2284     POP_REG_NE r9, RAW_VAR(reg)
   2285     POP_REG_NE r8, RAW_VAR(reg)
   2286     POP_REG_NE rdi, RAW_VAR(reg)
   2287     POP_REG_NE rsi, RAW_VAR(reg)
   2288     POP_REG_NE rdx, RAW_VAR(reg)
   2289     POP_REG_NE rcx, RAW_VAR(reg)
   2290     POP_REG_NE rax, RAW_VAR(reg)
   2291 .Lret_rb_\name:
   2292     ret
   2293 .Lret_forwarding_address\name:
   2294     // The overflow cleared the top bits.
   2295     sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
   2296     movq %rax, REG_VAR(reg)
   2297     POP_REG_NE rax, RAW_VAR(reg)
   2298     ret
   2299     END_FUNCTION VAR(name)
   2300 END_MACRO
   2301 
   2302 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, rax
   2303 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, rcx
   2304 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, rdx
   2305 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, rbx
   2306 // Note: There is no art_quick_read_barrier_mark_reg04, as register 4 (RSP)
   2307 // cannot be used to pass arguments.
   2308 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, rbp
   2309 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, rsi
   2310 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, rdi
   2311 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
   2312 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
   2313 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
   2314 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
   2315 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, r12
   2316 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, r13
   2317 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, r14
   2318 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, r15
   2319 
   2320 DEFINE_FUNCTION art_quick_read_barrier_slow
   2321     SETUP_FP_CALLEE_SAVE_FRAME
   2322     subq LITERAL(8), %rsp           // Alignment padding.
   2323     CFI_ADJUST_CFA_OFFSET(8)
   2324     call SYMBOL(artReadBarrierSlow) // artReadBarrierSlow(ref, obj, offset)
   2325     addq LITERAL(8), %rsp
   2326     CFI_ADJUST_CFA_OFFSET(-8)
   2327     RESTORE_FP_CALLEE_SAVE_FRAME
   2328     ret
   2329 END_FUNCTION art_quick_read_barrier_slow
   2330 
   2331 DEFINE_FUNCTION art_quick_read_barrier_for_root_slow
   2332     SETUP_FP_CALLEE_SAVE_FRAME
   2333     subq LITERAL(8), %rsp                  // Alignment padding.
   2334     CFI_ADJUST_CFA_OFFSET(8)
   2335     call SYMBOL(artReadBarrierForRootSlow) // artReadBarrierForRootSlow(root)
   2336     addq LITERAL(8), %rsp
   2337     CFI_ADJUST_CFA_OFFSET(-8)
   2338     RESTORE_FP_CALLEE_SAVE_FRAME
   2339     ret
   2340 END_FUNCTION art_quick_read_barrier_for_root_slow
   2341 
   2342     /*
   2343      * On stack replacement stub.
   2344      * On entry:
   2345      *   [sp] = return address
   2346      *   rdi = stack to copy
   2347      *   rsi = size of stack
   2348      *   rdx = pc to call
   2349      *   rcx = JValue* result
   2350      *   r8 = shorty
   2351      *   r9 = thread
   2352      *
   2353      * Note that the native C ABI already aligned the stack to 16-byte.
   2354      */
   2355 DEFINE_FUNCTION art_quick_osr_stub
   2356     // Save the non-volatiles.
   2357     PUSH rbp                      // Save rbp.
   2358     PUSH rcx                      // Save rcx/result*.
   2359     PUSH r8                       // Save r8/shorty*.
   2360 
   2361     // Save callee saves.
   2362     PUSH rbx
   2363     PUSH r12
   2364     PUSH r13
   2365     PUSH r14
   2366     PUSH r15
   2367 
   2368     pushq LITERAL(0)              // Push null for ArtMethod*.
   2369     movl %esi, %ecx               // rcx := size of stack
   2370     movq %rdi, %rsi               // rsi := stack to copy
   2371     call .Losr_entry
   2372 
   2373     // Restore stack and callee-saves.
   2374     addq LITERAL(8), %rsp
   2375     POP r15
   2376     POP r14
   2377     POP r13
   2378     POP r12
   2379     POP rbx
   2380     POP r8
   2381     POP rcx
   2382     POP rbp
   2383     cmpb LITERAL(68), (%r8)        // Test if result type char == 'D'.
   2384     je .Losr_return_double_quick
   2385     cmpb LITERAL(70), (%r8)        // Test if result type char == 'F'.
   2386     je .Losr_return_float_quick
   2387     movq %rax, (%rcx)              // Store the result assuming its a long, int or Object*
   2388     ret
   2389 .Losr_return_double_quick:
   2390     movsd %xmm0, (%rcx)            // Store the double floating point result.
   2391     ret
   2392 .Losr_return_float_quick:
   2393     movss %xmm0, (%rcx)            // Store the floating point result.
   2394     ret
   2395 .Losr_entry:
   2396     subl LITERAL(8), %ecx         // Given stack size contains pushed frame pointer, substract it.
   2397     subq %rcx, %rsp
   2398     movq %rsp, %rdi               // rdi := beginning of stack
   2399     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
   2400     jmp *%rdx
   2401 END_FUNCTION art_quick_osr_stub
   2402 
   2403 DEFINE_FUNCTION art_quick_invoke_polymorphic
   2404     SETUP_SAVE_REFS_AND_ARGS_FRAME                 // save callee saves
   2405     movq %gs:THREAD_SELF_OFFSET, %rdx              // pass Thread
   2406     movq %rsp, %rcx                                // pass SP
   2407     subq LITERAL(16), %rsp                         // make space for JValue result
   2408     CFI_ADJUST_CFA_OFFSET(16)
   2409     movq LITERAL(0), (%rsp)                        // initialize result
   2410     movq %rsp, %rdi                                // store pointer to JValue result
   2411     call SYMBOL(artInvokePolymorphic)              // artInvokePolymorphic(result, receiver, Thread*, SP)
   2412                                                    // save the code pointer
   2413     subq LITERAL('A'), %rax                        // Convert type descriptor character value to a zero based index.
   2414     cmpb LITERAL('Z' - 'A'), %al                   // Eliminate out of bounds options
   2415     ja .Lcleanup_and_return
   2416     movzbq %al, %rax
   2417     leaq .Lhandler_table(%rip), %rcx               // Get the address of the handler table
   2418     movslq (%rcx, %rax, 4), %rax                   // Lookup handler offset relative to table
   2419     addq %rcx, %rax                                // Add table address to yield handler address.
   2420     jmpq *%rax                                     // Jump to handler.
   2421 
   2422 .align 4
   2423 .Lhandler_table:                                   // Table of type descriptor to handlers.
   2424 MACRO1(HANDLER_TABLE_OFFSET, handle_label)
   2425     // NB some tools require 32-bits for relocations. Shouldn't need adjusting.
   2426     .long RAW_VAR(handle_label) - .Lhandler_table
   2427 END_MACRO
   2428     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // A
   2429     HANDLER_TABLE_OFFSET(.Lstore_long_result)      // B (byte)
   2430     HANDLER_TABLE_OFFSET(.Lstore_char_result)      // C (char)
   2431     HANDLER_TABLE_OFFSET(.Lstore_double_result)    // D (double)
   2432     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // E
   2433     HANDLER_TABLE_OFFSET(.Lstore_float_result)     // F (float)
   2434     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // G
   2435     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // H
   2436     HANDLER_TABLE_OFFSET(.Lstore_long_result)      // I (int)
   2437     HANDLER_TABLE_OFFSET(.Lstore_long_result)      // J (long)
   2438     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // K
   2439     HANDLER_TABLE_OFFSET(.Lstore_long_result)      // L (object - references are compressed and only 32-bits)
   2440     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // M
   2441     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // N
   2442     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // O
   2443     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // P
   2444     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Q
   2445     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // R
   2446     HANDLER_TABLE_OFFSET(.Lstore_long_result)      // S (short)
   2447     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // T
   2448     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // U
   2449     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // V (void)
   2450     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // W
   2451     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // X
   2452     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)     // Y
   2453     HANDLER_TABLE_OFFSET(.Lstore_boolean_result)   // Z (boolean)
   2454 
   2455 .Lstore_boolean_result:
   2456     movzbq (%rsp), %rax                            // Copy boolean result to the accumulator
   2457     jmp .Lcleanup_and_return
   2458 .Lstore_char_result:
   2459     movzwq (%rsp), %rax                            // Copy char result to the accumulator
   2460     jmp .Lcleanup_and_return
   2461 .Lstore_float_result:
   2462     movd (%rsp), %xmm0                             // Copy float result to the context restored by
   2463     movd %xmm0, 32(%rsp)                           // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2464     jmp .Lcleanup_and_return
   2465 .Lstore_double_result:
   2466     movsd (%rsp), %xmm0                            // Copy double result to the context restored by
   2467     movsd %xmm0, 32(%rsp)                          // RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2468     jmp .Lcleanup_and_return
   2469 .Lstore_long_result:
   2470     movq (%rsp), %rax                              // Copy long result to the accumulator.
   2471      // Fall-through
   2472 .Lcleanup_and_return:
   2473     addq LITERAL(16), %rsp                         // Pop space for JValue result.
   2474     CFI_ADJUST_CFA_OFFSET(16)
   2475     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2476     RETURN_OR_DELIVER_PENDING_EXCEPTION
   2477 END_FUNCTION art_quick_invoke_polymorphic
   2478