Home | History | Annotate | Download | only in x86_64
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_x86_64.S"
     18 
     19 MACRO0(SETUP_FP_CALLEE_SAVE_FRAME)
     20     // Create space for ART FP callee-saved registers
     21     subq MACRO_LITERAL(4 * 8), %rsp
     22     CFI_ADJUST_CFA_OFFSET(4 * 8)
     23     movq %xmm12, 0(%rsp)
     24     movq %xmm13, 8(%rsp)
     25     movq %xmm14, 16(%rsp)
     26     movq %xmm15, 24(%rsp)
     27 END_MACRO
     28 
     29 MACRO0(RESTORE_FP_CALLEE_SAVE_FRAME)
     30     // Restore ART FP callee-saved registers
     31     movq 0(%rsp), %xmm12
     32     movq 8(%rsp), %xmm13
     33     movq 16(%rsp), %xmm14
     34     movq 24(%rsp), %xmm15
     35     addq MACRO_LITERAL(4 * 8), %rsp
     36     CFI_ADJUST_CFA_OFFSET(- 4 * 8)
     37 END_MACRO
     38 
     39 // For x86, the CFA is esp+4, the address above the pushed return address on the stack.
     40 
     41     /*
     42      * Macro that sets up the callee save frame to conform with
     43      * Runtime::CreateCalleeSaveMethod(kSaveAll)
     44      */
     45 MACRO0(SETUP_SAVE_ALL_CALLEE_SAVE_FRAME)
     46 #if defined(__APPLE__)
     47     int3
     48     int3
     49 #else
     50     // R10 := Runtime::Current()
     51     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     52     movq (%r10), %r10
     53     // Save callee save registers to agree with core spills bitmap.
     54     PUSH r15  // Callee save.
     55     PUSH r14  // Callee save.
     56     PUSH r13  // Callee save.
     57     PUSH r12  // Callee save.
     58     PUSH rbp  // Callee save.
     59     PUSH rbx  // Callee save.
     60     // Create space for FPR args, plus space for ArtMethod*.
     61     subq MACRO_LITERAL(4 * 8 + 8), %rsp
     62     CFI_ADJUST_CFA_OFFSET(4 * 8 + 8)
     63     // Save FPRs.
     64     movq %xmm12, 8(%rsp)
     65     movq %xmm13, 16(%rsp)
     66     movq %xmm14, 24(%rsp)
     67     movq %xmm15, 32(%rsp)
     68     // R10 := ArtMethod* for save all callee save frame method.
     69     THIS_LOAD_REQUIRES_READ_BARRIER
     70     movq RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
     71     // Store ArtMethod* to bottom of stack.
     72     movq %r10, 0(%rsp)
     73     // Store rsp as the top quick frame.
     74     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
     75 
     76     // Ugly compile-time check, but we only have the preprocessor.
     77     // Last +8: implicit return address pushed on stack when caller made call.
     78 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
     79 #error "SAVE_ALL_CALLEE_SAVE_FRAME(X86_64) size not as expected."
     80 #endif
     81 #endif  // __APPLE__
     82 END_MACRO
     83 
     84     /*
     85      * Macro that sets up the callee save frame to conform with
     86      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
     87      */
     88 MACRO0(SETUP_REFS_ONLY_CALLEE_SAVE_FRAME)
     89 #if defined(__APPLE__)
     90     int3
     91     int3
     92 #else
     93     // R10 := Runtime::Current()
     94     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
     95     movq (%r10), %r10
     96     // Save callee and GPR args, mixed together to agree with core spills bitmap.
     97     PUSH r15  // Callee save.
     98     PUSH r14  // Callee save.
     99     PUSH r13  // Callee save.
    100     PUSH r12  // Callee save.
    101     PUSH rbp  // Callee save.
    102     PUSH rbx  // Callee save.
    103     // Create space for FPR args, plus space for ArtMethod*.
    104     subq LITERAL(8 + 4 * 8), %rsp
    105     CFI_ADJUST_CFA_OFFSET(8 + 4 * 8)
    106     // Save FPRs.
    107     movq %xmm12, 8(%rsp)
    108     movq %xmm13, 16(%rsp)
    109     movq %xmm14, 24(%rsp)
    110     movq %xmm15, 32(%rsp)
    111     // R10 := ArtMethod* for refs only callee save frame method.
    112     THIS_LOAD_REQUIRES_READ_BARRIER
    113     movq RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
    114     // Store ArtMethod* to bottom of stack.
    115     movq %r10, 0(%rsp)
    116     // Store rsp as the stop quick frame.
    117     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    118 
    119     // Ugly compile-time check, but we only have the preprocessor.
    120     // Last +8: implicit return address pushed on stack when caller made call.
    121 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 6 * 8 + 4 * 8 + 8 + 8)
    122 #error "REFS_ONLY_CALLEE_SAVE_FRAME(X86_64) size not as expected."
    123 #endif
    124 #endif  // __APPLE__
    125 END_MACRO
    126 
    127 MACRO0(RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME)
    128     movq 8(%rsp), %xmm12
    129     movq 16(%rsp), %xmm13
    130     movq 24(%rsp), %xmm14
    131     movq 32(%rsp), %xmm15
    132     addq LITERAL(8 + 4*8), %rsp
    133     CFI_ADJUST_CFA_OFFSET(-8 - 4*8)
    134     // TODO: optimize by not restoring callee-saves restored by the ABI
    135     POP rbx
    136     POP rbp
    137     POP r12
    138     POP r13
    139     POP r14
    140     POP r15
    141 END_MACRO
    142 
    143     /*
    144      * Macro that sets up the callee save frame to conform with
    145      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs)
    146      */
    147 MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
    148 #if defined(__APPLE__)
    149     int3
    150     int3
    151 #else
    152     // R10 := Runtime::Current()
    153     movq _ZN3art7Runtime9instance_E@GOTPCREL(%rip), %r10
    154     movq (%r10), %r10
    155     // Save callee and GPR args, mixed together to agree with core spills bitmap.
    156     PUSH r15  // Callee save.
    157     PUSH r14  // Callee save.
    158     PUSH r13  // Callee save.
    159     PUSH r12  // Callee save.
    160     PUSH r9   // Quick arg 5.
    161     PUSH r8   // Quick arg 4.
    162     PUSH rsi  // Quick arg 1.
    163     PUSH rbp  // Callee save.
    164     PUSH rbx  // Callee save.
    165     PUSH rdx  // Quick arg 2.
    166     PUSH rcx  // Quick arg 3.
    167     // Create space for FPR args and create 2 slots for ArtMethod*.
    168     subq MACRO_LITERAL(80 + 4 * 8), %rsp
    169     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
    170     // R10 := ArtMethod* for ref and args callee save frame method.
    171     THIS_LOAD_REQUIRES_READ_BARRIER
    172     movq RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET(%r10), %r10
    173     // Save FPRs.
    174     movq %xmm0, 16(%rsp)
    175     movq %xmm1, 24(%rsp)
    176     movq %xmm2, 32(%rsp)
    177     movq %xmm3, 40(%rsp)
    178     movq %xmm4, 48(%rsp)
    179     movq %xmm5, 56(%rsp)
    180     movq %xmm6, 64(%rsp)
    181     movq %xmm7, 72(%rsp)
    182     movq %xmm12, 80(%rsp)
    183     movq %xmm13, 88(%rsp)
    184     movq %xmm14, 96(%rsp)
    185     movq %xmm15, 104(%rsp)
    186     // Store ArtMethod* to bottom of stack.
    187     movq %r10, 0(%rsp)
    188     // Store rsp as the top quick frame.
    189     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    190 
    191     // Ugly compile-time check, but we only have the preprocessor.
    192     // Last +8: implicit return address pushed on stack when caller made call.
    193 #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 11 * 8 + 4 * 8 + 80 + 8)
    194 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(X86_64) size not as expected."
    195 #endif
    196 #endif  // __APPLE__
    197 END_MACRO
    198 
    199 MACRO0(SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI)
    200     // Save callee and GPR args, mixed together to agree with core spills bitmap.
    201     PUSH r15  // Callee save.
    202     PUSH r14  // Callee save.
    203     PUSH r13  // Callee save.
    204     PUSH r12  // Callee save.
    205     PUSH r9   // Quick arg 5.
    206     PUSH r8   // Quick arg 4.
    207     PUSH rsi  // Quick arg 1.
    208     PUSH rbp  // Callee save.
    209     PUSH rbx  // Callee save.
    210     PUSH rdx  // Quick arg 2.
    211     PUSH rcx  // Quick arg 3.
    212     // Create space for FPR args and create 2 slots for ArtMethod*.
    213     subq LITERAL(80 + 4 * 8), %rsp
    214     CFI_ADJUST_CFA_OFFSET(80 + 4 * 8)
    215     // Save FPRs.
    216     movq %xmm0, 16(%rsp)
    217     movq %xmm1, 24(%rsp)
    218     movq %xmm2, 32(%rsp)
    219     movq %xmm3, 40(%rsp)
    220     movq %xmm4, 48(%rsp)
    221     movq %xmm5, 56(%rsp)
    222     movq %xmm6, 64(%rsp)
    223     movq %xmm7, 72(%rsp)
    224     movq %xmm12, 80(%rsp)
    225     movq %xmm13, 88(%rsp)
    226     movq %xmm14, 96(%rsp)
    227     movq %xmm15, 104(%rsp)
    228     // Store ArtMethod to bottom of stack.
    229     movq %rdi, 0(%rsp)
    230     // Store rsp as the stop quick frame.
    231     movq %rsp, %gs:THREAD_TOP_QUICK_FRAME_OFFSET
    232 END_MACRO
    233 
    234 MACRO0(RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME)
    235     // Restore FPRs.
    236     movq 16(%rsp), %xmm0
    237     movq 24(%rsp), %xmm1
    238     movq 32(%rsp), %xmm2
    239     movq 40(%rsp), %xmm3
    240     movq 48(%rsp), %xmm4
    241     movq 56(%rsp), %xmm5
    242     movq 64(%rsp), %xmm6
    243     movq 72(%rsp), %xmm7
    244     movq 80(%rsp), %xmm12
    245     movq 88(%rsp), %xmm13
    246     movq 96(%rsp), %xmm14
    247     movq 104(%rsp), %xmm15
    248     addq MACRO_LITERAL(80 + 4 * 8), %rsp
    249     CFI_ADJUST_CFA_OFFSET(-(80 + 4 * 8))
    250     // Restore callee and GPR args, mixed together to agree with core spills bitmap.
    251     POP rcx
    252     POP rdx
    253     POP rbx
    254     POP rbp
    255     POP rsi
    256     POP r8
    257     POP r9
    258     POP r12
    259     POP r13
    260     POP r14
    261     POP r15
    262 END_MACRO
    263 
    264 
    265     /*
    266      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
    267      * exception is Thread::Current()->exception_.
    268      */
    269 MACRO0(DELIVER_PENDING_EXCEPTION)
    270     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME         // save callee saves for throw
    271     // (Thread*) setup
    272     movq %gs:THREAD_SELF_OFFSET, %rdi
    273     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
    274     UNREACHABLE
    275 END_MACRO
    276 
    277 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
    278     DEFINE_FUNCTION VAR(c_name, 0)
    279     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
    280     // Outgoing argument set up
    281     movq %gs:THREAD_SELF_OFFSET, %rdi  // pass Thread::Current()
    282     call VAR(cxx_name, 1)     // cxx_name(Thread*)
    283     UNREACHABLE
    284     END_FUNCTION VAR(c_name, 0)
    285 END_MACRO
    286 
    287 MACRO2(ONE_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
    288     DEFINE_FUNCTION VAR(c_name, 0)
    289     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
    290     // Outgoing argument set up
    291     movq %gs:THREAD_SELF_OFFSET, %rsi  // pass Thread::Current()
    292     call VAR(cxx_name, 1)     // cxx_name(arg1, Thread*)
    293     UNREACHABLE
    294     END_FUNCTION VAR(c_name, 0)
    295 END_MACRO
    296 
    297 MACRO2(TWO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
    298     DEFINE_FUNCTION VAR(c_name, 0)
    299     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
    300     // Outgoing argument set up
    301     movq %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
    302     call VAR(cxx_name, 1)     // cxx_name(Thread*)
    303     UNREACHABLE
    304     END_FUNCTION VAR(c_name, 0)
    305 END_MACRO
    306 
    307     /*
    308      * Called by managed code to create and deliver a NullPointerException.
    309      */
    310 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    311 
    312     /*
    313      * Called by managed code to create and deliver an ArithmeticException.
    314      */
    315 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
    316 
    317     /*
    318      * Called by managed code to create and deliver a StackOverflowError.
    319      */
    320 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    321 
    322     /*
    323      * Called by managed code, saves callee saves and then calls artThrowException
    324      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    325      */
    326 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    327 
    328     /*
    329      * Called by managed code to create and deliver a NoSuchMethodError.
    330      */
    331 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
    332 
    333     /*
    334      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    335      * index, arg2 holds limit.
    336      */
    337 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    338 
    339     /*
    340      * All generated callsites for interface invokes and invocation slow paths will load arguments
    341      * as usual - except instead of loading arg0/rdi with the target Method*, arg0/rdi will contain
    342      * the method_idx.  This wrapper will save arg1-arg3, load the caller's Method*, align the
    343      * stack and call the appropriate C helper.
    344      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/rsi.
    345      *
    346      * The helper will attempt to locate the target and return a 128-bit result in rax/rdx consisting
    347      * of the target Method* in rax and method->code_ in rdx.
    348      *
    349      * If unsuccessful, the helper will return null/????. There will be a pending exception in the
    350      * thread and we branch to another stub to deliver it.
    351      *
    352      * On success this wrapper will restore arguments and *jump* to the target, leaving the return
    353      * location on the stack.
    354      *
    355      * Adapted from x86 code.
    356      */
    357 MACRO2(INVOKE_TRAMPOLINE, c_name, cxx_name)
    358     DEFINE_FUNCTION VAR(c_name, 0)
    359     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // save callee saves in case allocation triggers GC
    360     // Helper signature is always
    361     // (method_idx, *this_object, *caller_method, *self, sp)
    362 
    363     movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE(%rsp), %rdx  // pass caller Method*
    364     movq %gs:THREAD_SELF_OFFSET, %rcx                      // pass Thread
    365     movq %rsp, %r8                                         // pass SP
    366 
    367     call VAR(cxx_name, 1)                   // cxx_name(arg1, arg2, caller method*, Thread*, SP)
    368                                                            // save the code pointer
    369     movq %rax, %rdi
    370     movq %rdx, %rax
    371     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
    372 
    373     testq %rdi, %rdi
    374     jz 1f
    375 
    376     // Tail call to intended method.
    377     jmp *%rax
    378 1:
    379     DELIVER_PENDING_EXCEPTION
    380     END_FUNCTION VAR(c_name, 0)
    381 END_MACRO
    382 
    383 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline, artInvokeInterfaceTrampoline
    384 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    385 
    386 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    387 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    388 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    389 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    390 
    391 
    392     /*
    393      * Helper for quick invocation stub to set up XMM registers. Assumes r10 == shorty,
    394      * r11 == arg_array. Clobbers r10, r11 and al. Branches to xmm_setup_finished if it encounters
    395      * the end of the shorty.
    396      */
    397 MACRO2(LOOP_OVER_SHORTY_LOADING_XMMS, xmm_reg, finished)
    398 1: // LOOP
    399     movb (%r10), %al              // al := *shorty
    400     addq MACRO_LITERAL(1), %r10   // shorty++
    401     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto xmm_setup_finished
    402     je VAR(finished, 1)
    403     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto FOUND_DOUBLE
    404     je 2f
    405     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto FOUND_FLOAT
    406     je 3f
    407     addq MACRO_LITERAL(4), %r11   // arg_array++
    408     //  Handle extra space in arg array taken by a long.
    409     cmpb MACRO_LITERAL(74), %al   // if (al != 'J') goto LOOP
    410     jne 1b
    411     addq MACRO_LITERAL(4), %r11   // arg_array++
    412     jmp 1b                        // goto LOOP
    413 2:  // FOUND_DOUBLE
    414     movsd (%r11), REG_VAR(xmm_reg, 0)
    415     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    416     jmp 4f
    417 3:  // FOUND_FLOAT
    418     movss (%r11), REG_VAR(xmm_reg, 0)
    419     addq MACRO_LITERAL(4), %r11   // arg_array++
    420 4:
    421 END_MACRO
    422 
    423     /*
    424      * Helper for quick invocation stub to set up GPR registers. Assumes r10 == shorty,
    425      * r11 == arg_array. Clobbers r10, r11 and al. Branches to gpr_setup_finished if it encounters
    426      * the end of the shorty.
    427      */
    428 MACRO3(LOOP_OVER_SHORTY_LOADING_GPRS, gpr_reg64, gpr_reg32, finished)
    429 1: // LOOP
    430     movb (%r10), %al              // al := *shorty
    431     addq MACRO_LITERAL(1), %r10   // shorty++
    432     cmpb MACRO_LITERAL(0), %al    // if (al == '\0') goto gpr_setup_finished
    433     je  VAR(finished, 2)
    434     cmpb MACRO_LITERAL(74), %al   // if (al == 'J') goto FOUND_LONG
    435     je 2f
    436     cmpb MACRO_LITERAL(70), %al   // if (al == 'F') goto SKIP_FLOAT
    437     je 3f
    438     cmpb MACRO_LITERAL(68), %al   // if (al == 'D') goto SKIP_DOUBLE
    439     je 4f
    440     movl (%r11), REG_VAR(gpr_reg32, 1)
    441     addq MACRO_LITERAL(4), %r11   // arg_array++
    442     jmp 5f
    443 2:  // FOUND_LONG
    444     movq (%r11), REG_VAR(gpr_reg64, 0)
    445     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    446     jmp 5f
    447 3:  // SKIP_FLOAT
    448     addq MACRO_LITERAL(4), %r11   // arg_array++
    449     jmp 1b
    450 4:  // SKIP_DOUBLE
    451     addq MACRO_LITERAL(8), %r11   // arg_array+=2
    452     jmp 1b
    453 5:
    454 END_MACRO
    455 
    456     /*
    457      * Quick invocation stub.
    458      * On entry:
    459      *   [sp] = return address
    460      *   rdi = method pointer
    461      *   rsi = argument array that must at least contain the this pointer.
    462      *   rdx = size of argument array in bytes
    463      *   rcx = (managed) thread pointer
    464      *   r8 = JValue* result
    465      *   r9 = char* shorty
    466      */
    467 DEFINE_FUNCTION art_quick_invoke_stub
    468 #if defined(__APPLE__)
    469     int3
    470     int3
    471 #else
    472     // Set up argument XMM registers.
    473     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character.
    474     leaq 4(%rsi), %r11            // R11 := arg_array + 4 ; ie skip this pointer.
    475     LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished
    476     LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished
    477     LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished
    478     LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished
    479     LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished
    480     LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished
    481     LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished
    482     LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished
    483     .balign 16
    484 .Lxmm_setup_finished:
    485     PUSH rbp                      // Save rbp.
    486     PUSH r8                       // Save r8/result*.
    487     PUSH r9                       // Save r9/shorty*.
    488     PUSH rbx                      // Save native callee save rbx
    489     PUSH r12                      // Save native callee save r12
    490     PUSH r13                      // Save native callee save r13
    491     PUSH r14                      // Save native callee save r14
    492     PUSH r15                      // Save native callee save r15
    493     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
    494     CFI_DEF_CFA_REGISTER(rbp)
    495 
    496     movl %edx, %r10d
    497     addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
    498                                    // r8, r9, rbx, r12, r13, r14, and r15 in frame.
    499     andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
    500     subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
    501                                    // r13, r14, and r15
    502     subq %rdx, %rsp                // Reserve stack space for argument array.
    503 
    504 #if (STACK_REFERENCE_SIZE != 4)
    505 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
    506 #endif
    507     movq LITERAL(0), (%rsp)       // Store null for method*
    508 
    509     movl %r10d, %ecx              // Place size of args in rcx.
    510     movq %rdi, %rax               // rax := method to be called
    511     movq %rsi, %r11               // r11 := arg_array
    512     leaq 8(%rsp), %rdi            // rdi is pointing just above the ArtMethod* in the stack
    513                                   // arguments.
    514     // Copy arg array into stack.
    515     rep movsb                     // while (rcx--) { *rdi++ = *rsi++ }
    516     leaq 1(%r9), %r10             // r10 := shorty + 1  ; ie skip return arg character
    517     movq %rax, %rdi               // rdi := method to be called
    518     movl (%r11), %esi             // rsi := this pointer
    519     addq LITERAL(4), %r11         // arg_array++
    520     LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished
    521     LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished
    522     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished
    523     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished
    524 .Lgpr_setup_finished:
    525     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    526     movq %rbp, %rsp               // Restore stack pointer.
    527     POP r15                       // Pop r15
    528     POP r14                       // Pop r14
    529     POP r13                       // Pop r13
    530     POP r12                       // Pop r12
    531     POP rbx                       // Pop rbx
    532     POP r9                        // Pop r9 - shorty*
    533     POP r8                        // Pop r8 - result*.
    534     POP rbp                       // Pop rbp
    535     cmpb LITERAL(68), (%r9)       // Test if result type char == 'D'.
    536     je .Lreturn_double_quick
    537     cmpb LITERAL(70), (%r9)       // Test if result type char == 'F'.
    538     je .Lreturn_float_quick
    539     movq %rax, (%r8)              // Store the result assuming its a long, int or Object*
    540     ret
    541 .Lreturn_double_quick:
    542     movsd %xmm0, (%r8)            // Store the double floating point result.
    543     ret
    544 .Lreturn_float_quick:
    545     movss %xmm0, (%r8)            // Store the floating point result.
    546     ret
    547 #endif  // __APPLE__
    548 END_FUNCTION art_quick_invoke_stub
    549 
    550     /*
    551      * Quick invocation stub.
    552      * On entry:
    553      *   [sp] = return address
    554      *   rdi = method pointer
    555      *   rsi = argument array or null if no arguments.
    556      *   rdx = size of argument array in bytes
    557      *   rcx = (managed) thread pointer
    558      *   r8 = JValue* result
    559      *   r9 = char* shorty
    560      */
    561 DEFINE_FUNCTION art_quick_invoke_static_stub
    562 #if defined(__APPLE__)
    563     int3
    564     int3
    565 #else
    566     // Set up argument XMM registers.
    567     leaq 1(%r9), %r10             // R10 := shorty + 1  ; ie skip return arg character
    568     movq %rsi, %r11               // R11 := arg_array
    569     LOOP_OVER_SHORTY_LOADING_XMMS xmm0, .Lxmm_setup_finished2
    570     LOOP_OVER_SHORTY_LOADING_XMMS xmm1, .Lxmm_setup_finished2
    571     LOOP_OVER_SHORTY_LOADING_XMMS xmm2, .Lxmm_setup_finished2
    572     LOOP_OVER_SHORTY_LOADING_XMMS xmm3, .Lxmm_setup_finished2
    573     LOOP_OVER_SHORTY_LOADING_XMMS xmm4, .Lxmm_setup_finished2
    574     LOOP_OVER_SHORTY_LOADING_XMMS xmm5, .Lxmm_setup_finished2
    575     LOOP_OVER_SHORTY_LOADING_XMMS xmm6, .Lxmm_setup_finished2
    576     LOOP_OVER_SHORTY_LOADING_XMMS xmm7, .Lxmm_setup_finished2
    577     .balign 16
    578 .Lxmm_setup_finished2:
    579     PUSH rbp                      // Save rbp.
    580     PUSH r8                       // Save r8/result*.
    581     PUSH r9                       // Save r9/shorty*.
    582     PUSH rbx                      // Save rbx
    583     PUSH r12                      // Save r12
    584     PUSH r13                      // Save r13
    585     PUSH r14                      // Save r14
    586     PUSH r15                      // Save r15
    587     movq %rsp, %rbp               // Copy value of stack pointer into base pointer.
    588     CFI_DEF_CFA_REGISTER(rbp)
    589 
    590     movl %edx, %r10d
    591     addl LITERAL(100), %edx        // Reserve space for return addr, StackReference<method>, rbp,
    592                                    // r8, r9, r12, r13, r14, and r15 in frame.
    593     andl LITERAL(0xFFFFFFF0), %edx // Align frame size to 16 bytes.
    594     subl LITERAL(72), %edx         // Remove space for return address, rbp, r8, r9, rbx, r12,
    595                                    // r13, r14, and r15.
    596     subq %rdx, %rsp                // Reserve stack space for argument array.
    597 
    598 #if (STACK_REFERENCE_SIZE != 4)
    599 #error "STACK_REFERENCE_SIZE(X86_64) size not as expected."
    600 #endif
    601     movq LITERAL(0), (%rsp)        // Store null for method*
    602 
    603     movl %r10d, %ecx               // Place size of args in rcx.
    604     movq %rdi, %rax                // rax := method to be called
    605     movq %rsi, %r11                // r11 := arg_array
    606     leaq 8(%rsp), %rdi             // rdi is pointing just above the ArtMethod* in the
    607                                    // stack arguments.
    608     // Copy arg array into stack.
    609     rep movsb                      // while (rcx--) { *rdi++ = *rsi++ }
    610     leaq 1(%r9), %r10              // r10 := shorty + 1  ; ie skip return arg character
    611     movq %rax, %rdi                // rdi := method to be called
    612     LOOP_OVER_SHORTY_LOADING_GPRS rsi, esi, .Lgpr_setup_finished2
    613     LOOP_OVER_SHORTY_LOADING_GPRS rdx, edx, .Lgpr_setup_finished2
    614     LOOP_OVER_SHORTY_LOADING_GPRS rcx, ecx, .Lgpr_setup_finished2
    615     LOOP_OVER_SHORTY_LOADING_GPRS r8, r8d, .Lgpr_setup_finished2
    616     LOOP_OVER_SHORTY_LOADING_GPRS r9, r9d, .Lgpr_setup_finished2
    617 .Lgpr_setup_finished2:
    618     call *ART_METHOD_QUICK_CODE_OFFSET_64(%rdi) // Call the method.
    619     movq %rbp, %rsp                // Restore stack pointer.
    620     POP r15                        // Pop r15
    621     POP r14                        // Pop r14
    622     POP r13                        // Pop r13
    623     POP r12                        // Pop r12
    624     POP rbx                        // Pop rbx
    625     POP r9                         // Pop r9 - shorty*.
    626     POP r8                         // Pop r8 - result*.
    627     POP rbp                        // Pop rbp
    628     cmpb LITERAL(68), (%r9)        // Test if result type char == 'D'.
    629     je .Lreturn_double_quick2
    630     cmpb LITERAL(70), (%r9)        // Test if result type char == 'F'.
    631     je .Lreturn_float_quick2
    632     movq %rax, (%r8)               // Store the result assuming its a long, int or Object*
    633     ret
    634 .Lreturn_double_quick2:
    635     movsd %xmm0, (%r8)             // Store the double floating point result.
    636     ret
    637 .Lreturn_float_quick2:
    638     movss %xmm0, (%r8)             // Store the floating point result.
    639     ret
    640 #endif  // __APPLE__
    641 END_FUNCTION art_quick_invoke_static_stub
    642 
    643     /*
    644      * Long jump stub.
    645      * On entry:
    646      *   rdi = gprs
    647      *   rsi = fprs
    648      */
    649 DEFINE_FUNCTION art_quick_do_long_jump
    650 #if defined(__APPLE__)
    651     int3
    652     int3
    653 #else
    654     // Restore FPRs.
    655     movq 0(%rsi), %xmm0
    656     movq 8(%rsi), %xmm1
    657     movq 16(%rsi), %xmm2
    658     movq 24(%rsi), %xmm3
    659     movq 32(%rsi), %xmm4
    660     movq 40(%rsi), %xmm5
    661     movq 48(%rsi), %xmm6
    662     movq 56(%rsi), %xmm7
    663     movq 64(%rsi), %xmm8
    664     movq 72(%rsi), %xmm9
    665     movq 80(%rsi), %xmm10
    666     movq 88(%rsi), %xmm11
    667     movq 96(%rsi), %xmm12
    668     movq 104(%rsi), %xmm13
    669     movq 112(%rsi), %xmm14
    670     movq 120(%rsi), %xmm15
    671     // Restore FPRs.
    672     movq %rdi, %rsp   // RSP points to gprs.
    673     // Load all registers except RSP and RIP with values in gprs.
    674     popq %r15
    675     popq %r14
    676     popq %r13
    677     popq %r12
    678     popq %r11
    679     popq %r10
    680     popq %r9
    681     popq %r8
    682     popq %rdi
    683     popq %rsi
    684     popq %rbp
    685     addq LITERAL(8), %rsp   // Skip rsp
    686     popq %rbx
    687     popq %rdx
    688     popq %rcx
    689     popq %rax
    690     popq %rsp      // Load stack pointer.
    691     ret            // From higher in the stack pop rip.
    692 #endif  // __APPLE__
    693 END_FUNCTION art_quick_do_long_jump
    694 
    695 MACRO3(NO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    696     DEFINE_FUNCTION VAR(c_name, 0)
    697     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
    698     // Outgoing argument set up
    699     movq %gs:THREAD_SELF_OFFSET, %rdi    // pass Thread::Current()
    700     call VAR(cxx_name, 1)                // cxx_name(Thread*)
    701     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
    702     CALL_MACRO(return_macro, 2)          // return or deliver exception
    703     END_FUNCTION VAR(c_name, 0)
    704 END_MACRO
    705 
    706 MACRO3(ONE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    707     DEFINE_FUNCTION VAR(c_name, 0)
    708     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
    709     // Outgoing argument set up
    710     movq %gs:THREAD_SELF_OFFSET, %rsi    // pass Thread::Current()
    711     call VAR(cxx_name, 1)                // cxx_name(arg0, Thread*)
    712     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
    713     CALL_MACRO(return_macro, 2)          // return or deliver exception
    714     END_FUNCTION VAR(c_name, 0)
    715 END_MACRO
    716 
    717 MACRO3(TWO_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    718     DEFINE_FUNCTION VAR(c_name, 0)
    719     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
    720     // Outgoing argument set up
    721     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
    722     call VAR(cxx_name, 1)                // cxx_name(arg0, arg1, Thread*)
    723     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
    724     CALL_MACRO(return_macro, 2)          // return or deliver exception
    725     END_FUNCTION VAR(c_name, 0)
    726 END_MACRO
    727 
    728 MACRO3(THREE_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    729     DEFINE_FUNCTION VAR(c_name, 0)
    730     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
    731     // Outgoing argument set up
    732     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
    733     call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, Thread*)
    734     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
    735     CALL_MACRO(return_macro, 2)         // return or deliver exception
    736     END_FUNCTION VAR(c_name, 0)
    737 END_MACRO
    738 
    739 MACRO3(FOUR_ARG_DOWNCALL, c_name, cxx_name, return_macro)
    740     DEFINE_FUNCTION VAR(c_name, 0)
    741     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME   // save ref containing registers for GC
    742     // Outgoing argument set up
    743     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
    744     call VAR(cxx_name, 1)               // cxx_name(arg1, arg2, arg3, arg4, Thread*)
    745     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
    746     CALL_MACRO(return_macro, 2)         // return or deliver exception
    747     END_FUNCTION VAR(c_name, 0)
    748 END_MACRO
    749 
    750 MACRO3(ONE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    751     DEFINE_FUNCTION VAR(c_name, 0)
    752     movq 8(%rsp), %rsi                  // pass referrer
    753     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
    754                                         // arg0 is in rdi
    755     movq %gs:THREAD_SELF_OFFSET, %rdx   // pass Thread::Current()
    756     call VAR(cxx_name, 1)               // cxx_name(arg0, referrer, Thread*)
    757     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
    758     CALL_MACRO(return_macro, 2)
    759     END_FUNCTION VAR(c_name, 0)
    760 END_MACRO
    761 
    762 MACRO3(TWO_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    763     DEFINE_FUNCTION VAR(c_name, 0)
    764     movq 8(%rsp), %rdx                  // pass referrer
    765     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
    766                                         // arg0 and arg1 are in rdi/rsi
    767     movq %gs:THREAD_SELF_OFFSET, %rcx   // pass Thread::Current()
    768     call VAR(cxx_name, 1)               // (arg0, arg1, referrer, Thread*)
    769     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
    770     CALL_MACRO(return_macro, 2)
    771     END_FUNCTION VAR(c_name, 0)
    772 END_MACRO
    773 
    774 MACRO3(THREE_ARG_REF_DOWNCALL, c_name, cxx_name, return_macro)
    775     DEFINE_FUNCTION VAR(c_name, 0)
    776     movq 8(%rsp), %rcx                  // pass referrer
    777     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
    778                                         // arg0, arg1, and arg2 are in rdi/rsi/rdx
    779     movq %gs:THREAD_SELF_OFFSET, %r8    // pass Thread::Current()
    780     call VAR(cxx_name, 1)               // cxx_name(arg0, arg1, arg2, referrer, Thread*)
    781     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME // restore frame up to return address
    782     CALL_MACRO(return_macro, 2)         // return or deliver exception
    783     END_FUNCTION VAR(c_name, 0)
    784 END_MACRO
    785 
    786 MACRO0(RETURN_IF_RESULT_IS_NON_ZERO)
    787     testq %rax, %rax               // rax == 0 ?
    788     jz  1f                         // if rax == 0 goto 1
    789     ret                            // return
    790 1:                                 // deliver exception on current thread
    791     DELIVER_PENDING_EXCEPTION
    792 END_MACRO
    793 
    794 MACRO0(RETURN_IF_EAX_ZERO)
    795     testl %eax, %eax               // eax == 0 ?
    796     jnz  1f                        // if eax != 0 goto 1
    797     ret                            // return
    798 1:                                 // deliver exception on current thread
    799     DELIVER_PENDING_EXCEPTION
    800 END_MACRO
    801 
    802 MACRO0(RETURN_OR_DELIVER_PENDING_EXCEPTION)
    803     movq %gs:THREAD_EXCEPTION_OFFSET, %rcx // get exception field
    804     testq %rcx, %rcx               // rcx == 0 ?
    805     jnz 1f                         // if rcx != 0 goto 1
    806     ret                            // return
    807 1:                                 // deliver exception on current thread
    808     DELIVER_PENDING_EXCEPTION
    809 END_MACRO
    810 
    811 // Generate the allocation entrypoints for each allocator.
    812 // TODO: use arch/quick_alloc_entrypoints.S. Currently we don't as we need to use concatenation
    813 // macros to work around differences between OS/X's as and binutils as (OS/X lacks named arguments
    814 // to macros and the VAR macro won't concatenate arguments properly), this also breaks having
    815 // multi-line macros that use each other (hence using 1 macro per newline below).
    816 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(c_suffix, cxx_suffix) \
    817   TWO_ARG_DOWNCALL art_quick_alloc_object ## c_suffix, artAllocObjectFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    818 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(c_suffix, cxx_suffix) \
    819   TWO_ARG_DOWNCALL art_quick_alloc_object_resolved ## c_suffix, artAllocObjectFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    820 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(c_suffix, cxx_suffix) \
    821   TWO_ARG_DOWNCALL art_quick_alloc_object_initialized ## c_suffix, artAllocObjectFromCodeInitialized ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    822 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
    823   TWO_ARG_DOWNCALL art_quick_alloc_object_with_access_check ## c_suffix, artAllocObjectFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    824 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(c_suffix, cxx_suffix) \
    825   THREE_ARG_DOWNCALL art_quick_alloc_array ## c_suffix, artAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    826 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(c_suffix, cxx_suffix) \
    827   THREE_ARG_DOWNCALL art_quick_alloc_array_resolved ## c_suffix, artAllocArrayFromCodeResolved ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    828 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
    829   THREE_ARG_DOWNCALL art_quick_alloc_array_with_access_check ## c_suffix, artAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    830 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(c_suffix, cxx_suffix) \
    831   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array ## c_suffix, artCheckAndAllocArrayFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    832 #define GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(c_suffix, cxx_suffix) \
    833   THREE_ARG_DOWNCALL art_quick_check_and_alloc_array_with_access_check ## c_suffix, artCheckAndAllocArrayFromCodeWithAccessCheck ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    834 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(c_suffix, cxx_suffix) \
    835   FOUR_ARG_DOWNCALL art_quick_alloc_string_from_bytes ## c_suffix, artAllocStringFromBytesFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    836 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(c_suffix, cxx_suffix) \
    837   THREE_ARG_DOWNCALL art_quick_alloc_string_from_chars ## c_suffix, artAllocStringFromCharsFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    838 #define GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(c_suffix, cxx_suffix) \
    839   ONE_ARG_DOWNCALL art_quick_alloc_string_from_string ## c_suffix, artAllocStringFromStringFromCode ## cxx_suffix, RETURN_IF_RESULT_IS_NON_ZERO
    840 
    841 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc, DlMalloc)
    842 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc, DlMalloc)
    843 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc, DlMalloc)
    844 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
    845 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc, DlMalloc)
    846 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc, DlMalloc)
    847 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
    848 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc, DlMalloc)
    849 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc, DlMalloc)
    850 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc, DlMalloc)
    851 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc, DlMalloc)
    852 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc, DlMalloc)
    853 
    854 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_dlmalloc_instrumented, DlMallocInstrumented)
    855 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
    856 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_dlmalloc_instrumented, DlMallocInstrumented)
    857 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
    858 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
    859 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_dlmalloc_instrumented, DlMallocInstrumented)
    860 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
    861 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_dlmalloc_instrumented, DlMallocInstrumented)
    862 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_dlmalloc_instrumented, DlMallocInstrumented)
    863 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_dlmalloc_instrumented, DlMallocInstrumented)
    864 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_dlmalloc_instrumented, DlMallocInstrumented)
    865 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_dlmalloc_instrumented, DlMallocInstrumented)
    866 
    867 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc)
    868 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc, RosAlloc)
    869 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc, RosAlloc)
    870 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
    871 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc, RosAlloc)
    872 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc, RosAlloc)
    873 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
    874 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc, RosAlloc)
    875 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc, RosAlloc)
    876 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc, RosAlloc)
    877 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc, RosAlloc)
    878 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc, RosAlloc)
    879 
    880 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc_instrumented, RosAllocInstrumented)
    881 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
    882 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_rosalloc_instrumented, RosAllocInstrumented)
    883 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
    884 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
    885 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_rosalloc_instrumented, RosAllocInstrumented)
    886 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
    887 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_rosalloc_instrumented, RosAllocInstrumented)
    888 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_rosalloc_instrumented, RosAllocInstrumented)
    889 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_rosalloc_instrumented, RosAllocInstrumented)
    890 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_rosalloc_instrumented, RosAllocInstrumented)
    891 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_rosalloc_instrumented, RosAllocInstrumented)
    892 
    893 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer, BumpPointer)
    894 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer, BumpPointer)
    895 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer, BumpPointer)
    896 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
    897 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer, BumpPointer)
    898 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer, BumpPointer)
    899 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
    900 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer, BumpPointer)
    901 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer, BumpPointer)
    902 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer, BumpPointer)
    903 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer, BumpPointer)
    904 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer, BumpPointer)
    905 
    906 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_bump_pointer_instrumented, BumpPointerInstrumented)
    907 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
    908 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_bump_pointer_instrumented, BumpPointerInstrumented)
    909 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
    910 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
    911 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_bump_pointer_instrumented, BumpPointerInstrumented)
    912 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
    913 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_bump_pointer_instrumented, BumpPointerInstrumented)
    914 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_bump_pointer_instrumented, BumpPointerInstrumented)
    915 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_bump_pointer_instrumented, BumpPointerInstrumented)
    916 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_bump_pointer_instrumented, BumpPointerInstrumented)
    917 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_bump_pointer_instrumented, BumpPointerInstrumented)
    918 
    919 DEFINE_FUNCTION art_quick_alloc_object_tlab
    920     // Fast path tlab allocation.
    921     // RDI: uint32_t type_idx, RSI: ArtMethod*
    922     // RDX, RCX, R8, R9: free. RAX: return val.
    923     movl ART_METHOD_DEX_CACHE_TYPES_OFFSET(%rsi), %edx  // Load dex cache resolved types array
    924                                                                // Load the class
    925     movl MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdx, %rdi, MIRROR_OBJECT_ARRAY_COMPONENT_SIZE), %edx
    926     testl %edx, %edx                                           // Check null class
    927     jz   .Lart_quick_alloc_object_tlab_slow_path
    928                                                                // Check class status.
    929     cmpl LITERAL(MIRROR_CLASS_STATUS_INITIALIZED), MIRROR_CLASS_STATUS_OFFSET(%rdx)
    930     jne  .Lart_quick_alloc_object_tlab_slow_path
    931                                                                // Check access flags has kAccClassIsFinalizable
    932     testl LITERAL(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), MIRROR_CLASS_ACCESS_FLAGS_OFFSET(%rdx)
    933     jnz  .Lart_quick_alloc_object_tlab_slow_path
    934     movl MIRROR_CLASS_OBJECT_SIZE_OFFSET(%rdx), %ecx           // Load the object size.
    935     addl LITERAL(OBJECT_ALIGNMENT_MASK), %ecx                  // Align the size by 8. (addr + 7) & ~7.
    936     andl LITERAL(OBJECT_ALIGNMENT_MASK_TOGGLED), %ecx
    937     movq %gs:THREAD_SELF_OFFSET, %r8                           // r8 = thread
    938     movq THREAD_LOCAL_POS_OFFSET(%r8), %rax                    // Load thread_local_pos.
    939     addq %rax, %rcx                                            // Add the object size.
    940     cmpq THREAD_LOCAL_END_OFFSET(%r8), %rcx                    // Check if it fits.
    941     ja   .Lart_quick_alloc_object_tlab_slow_path
    942     movq %rcx, THREAD_LOCAL_POS_OFFSET(%r8)                    // Update thread_local_pos.
    943     addq LITERAL(1), THREAD_LOCAL_OBJECTS_OFFSET(%r8)          // Increment thread_local_objects.
    944                                                                // Store the class pointer in the header.
    945                                                                // No fence needed for x86.
    946     movl %edx, MIRROR_OBJECT_CLASS_OFFSET(%rax)
    947     ret                                                        // Fast path succeeded.
    948 .Lart_quick_alloc_object_tlab_slow_path:
    949     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME    // save ref containing registers for GC
    950     // Outgoing argument set up
    951     movq %gs:THREAD_SELF_OFFSET, %rdx    // pass Thread::Current()
    952     call SYMBOL(artAllocObjectFromCodeTLAB)      // cxx_name(arg0, arg1, Thread*)
    953     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
    954     RETURN_IF_RESULT_IS_NON_ZERO         // return or deliver exception
    955 END_FUNCTION art_quick_alloc_object_tlab
    956 
    957 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
    958 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
    959 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
    960 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab, TLAB)
    961 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
    962 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
    963 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab, TLAB)
    964 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab, TLAB)
    965 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
    966 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
    967 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
    968 
    969 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab_instrumented, TLABInstrumented)
    970 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab_instrumented, TLABInstrumented)
    971 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab_instrumented, TLABInstrumented)
    972 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
    973 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
    974 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab_instrumented, TLABInstrumented)
    975 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
    976 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_tlab_instrumented, TLABInstrumented)
    977 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_tlab_instrumented, TLABInstrumented)
    978 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab_instrumented, TLABInstrumented)
    979 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab_instrumented, TLABInstrumented)
    980 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab_instrumented, TLABInstrumented)
    981 
    982 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region, Region)
    983 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region, Region)
    984 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region, Region)
    985 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region, Region)
    986 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region, Region)
    987 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region, Region)
    988 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
    989 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region, Region)
    990 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region, Region)
    991 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region, Region)
    992 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region, Region)
    993 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region, Region)
    994 
    995 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_instrumented, RegionInstrumented)
    996 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_instrumented, RegionInstrumented)
    997 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_instrumented, RegionInstrumented)
    998 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
    999 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
   1000 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_instrumented, RegionInstrumented)
   1001 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
   1002 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_instrumented, RegionInstrumented)
   1003 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_instrumented, RegionInstrumented)
   1004 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_instrumented, RegionInstrumented)
   1005 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_instrumented, RegionInstrumented)
   1006 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_instrumented, RegionInstrumented)
   1007 
   1008 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
   1009 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
   1010 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
   1011 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1012 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab, RegionTLAB)
   1013 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
   1014 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1015 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab, RegionTLAB)
   1016 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1017 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
   1018 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
   1019 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
   1020 
   1021 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab_instrumented, RegionTLABInstrumented)
   1022 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
   1023 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab_instrumented, RegionTLABInstrumented)
   1024 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
   1025 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
   1026 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab_instrumented, RegionTLABInstrumented)
   1027 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
   1028 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY(_region_tlab_instrumented, RegionTLABInstrumented)
   1029 GENERATE_ALLOC_ENTRYPOINTS_CHECK_AND_ALLOC_ARRAY_WITH_ACCESS_CHECK(_region_tlab_instrumented, RegionTLABInstrumented)
   1030 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab_instrumented, RegionTLABInstrumented)
   1031 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab_instrumented, RegionTLABInstrumented)
   1032 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab_instrumented, RegionTLABInstrumented)
   1033 
   1034 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO
   1035 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO
   1036 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO
   1037 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO
   1038 
   1039 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_EAX_ZERO
   1040 
   1041 DEFINE_FUNCTION art_quick_lock_object
   1042     testl %edi, %edi                      // Null check object/rdi.
   1043     jz   .Lslow_lock
   1044 .Lretry_lock:
   1045     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word.
   1046     test LITERAL(LOCK_WORD_STATE_MASK), %ecx         // Test the 2 high bits.
   1047     jne  .Lslow_lock                      // Slow path if either of the two high bits are set.
   1048     movl %ecx, %edx                       // save lock word (edx) to keep read barrier bits.
   1049     andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
   1050     test %ecx, %ecx
   1051     jnz  .Lalready_thin                   // Lock word contains a thin lock.
   1052     // unlocked case - edx: original lock word, edi: obj.
   1053     movl %edx, %eax                       // eax: lock word zero except for read barrier bits.
   1054     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
   1055     or   %eax, %edx                       // edx: thread id with count of 0 + read barrier bits.
   1056     lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1057     jnz  .Lretry_lock                     // cmpxchg failed retry
   1058     ret
   1059 .Lalready_thin:  // edx: lock word (with high 2 bits zero and original rb bits), edi: obj.
   1060     movl %gs:THREAD_ID_OFFSET, %ecx       // ecx := thread id
   1061     cmpw %cx, %dx                         // do we hold the lock already?
   1062     jne  .Lslow_lock
   1063     movl %edx, %ecx                       // copy the lock word to check count overflow.
   1064     andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %ecx  // zero the read barrier bits.
   1065     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx  // increment recursion count
   1066     test LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // overflowed if either of the upper two bits (28-29) are set
   1067     jne  .Lslow_lock                      // count overflowed so go slow
   1068     movl %edx, %eax                       // copy the lock word as the old val for cmpxchg.
   1069     addl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx   // increment recursion count again for real.
   1070     // update lockword, cmpxchg necessary for read barrier bits.
   1071     lock cmpxchg  %edx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, edx: new val.
   1072     jnz  .Lretry_lock                     // cmpxchg failed retry
   1073     ret
   1074 .Lslow_lock:
   1075     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
   1076     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1077     call SYMBOL(artLockObjectFromCode)    // artLockObjectFromCode(object, Thread*)
   1078     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
   1079     RETURN_IF_EAX_ZERO
   1080 END_FUNCTION art_quick_lock_object
   1081 
   1082 DEFINE_FUNCTION art_quick_unlock_object
   1083     testl %edi, %edi                      // null check object/edi
   1084     jz   .Lslow_unlock
   1085 .Lretry_unlock:
   1086     movl MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi), %ecx  // ecx := lock word
   1087     movl %gs:THREAD_ID_OFFSET, %edx       // edx := thread id
   1088     test LITERAL(LOCK_WORD_STATE_MASK), %ecx
   1089     jnz  .Lslow_unlock                    // lock word contains a monitor
   1090     cmpw %cx, %dx                         // does the thread id match?
   1091     jne  .Lslow_unlock
   1092     movl %ecx, %edx                       // copy the lock word to detect new count of 0.
   1093     andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), %edx  // zero the read barrier bits.
   1094     cmpl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %edx
   1095     jae  .Lrecursive_thin_unlock
   1096     // update lockword, cmpxchg necessary for read barrier bits.
   1097     movl %ecx, %eax                       // eax: old lock word.
   1098     andl LITERAL(LOCK_WORD_READ_BARRIER_STATE_MASK), %ecx  // ecx: new lock word zero except original rb bits.
   1099 #ifndef USE_READ_BARRIER
   1100     movl %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1101 #else
   1102     lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
   1103     jnz  .Lretry_unlock                   // cmpxchg failed retry
   1104 #endif
   1105     ret
   1106 .Lrecursive_thin_unlock:  // ecx: original lock word, edi: obj
   1107     // update lockword, cmpxchg necessary for read barrier bits.
   1108     movl %ecx, %eax                       // eax: old lock word.
   1109     subl LITERAL(LOCK_WORD_THIN_LOCK_COUNT_ONE), %ecx
   1110 #ifndef USE_READ_BARRIER
   1111     mov  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)
   1112 #else
   1113     lock cmpxchg  %ecx, MIRROR_OBJECT_LOCK_WORD_OFFSET(%edi)  // eax: old val, ecx: new val.
   1114     jnz  .Lretry_unlock                   // cmpxchg failed retry
   1115 #endif
   1116     ret
   1117 .Lslow_unlock:
   1118     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
   1119     movq %gs:THREAD_SELF_OFFSET, %rsi     // pass Thread::Current()
   1120     call SYMBOL(artUnlockObjectFromCode)  // artUnlockObjectFromCode(object, Thread*)
   1121     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME   // restore frame up to return address
   1122     RETURN_IF_EAX_ZERO
   1123 END_FUNCTION art_quick_unlock_object
   1124 
   1125 DEFINE_FUNCTION art_quick_check_cast
   1126     PUSH rdi                          // Save args for exc
   1127     PUSH rsi
   1128     SETUP_FP_CALLEE_SAVE_FRAME
   1129     call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
   1130     testq %rax, %rax
   1131     jz 1f                             // jump forward if not assignable
   1132     RESTORE_FP_CALLEE_SAVE_FRAME
   1133     addq LITERAL(16), %rsp            // pop arguments
   1134     CFI_ADJUST_CFA_OFFSET(-16)
   1135 
   1136     ret
   1137 1:
   1138     RESTORE_FP_CALLEE_SAVE_FRAME
   1139     POP rsi                           // Pop arguments
   1140     POP rdi
   1141     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // save all registers as basis for long jump context
   1142     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
   1143     call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
   1144     int3                              // unreached
   1145 END_FUNCTION art_quick_check_cast
   1146 
   1147 
   1148     /*
   1149      * Entry from managed code for array put operations of objects where the value being stored
   1150      * needs to be checked for compatibility.
   1151      *
   1152      * Currently all the parameters should fit into the 32b portions of the registers. Index always
   1153      * will. So we optimize for a tighter encoding. The 64b versions are in comments.
   1154      *
   1155      * rdi(edi) = array, rsi(esi) = index, rdx(edx) = value
   1156      */
   1157 DEFINE_FUNCTION art_quick_aput_obj_with_null_and_bound_check
   1158 #if defined(__APPLE__)
   1159     int3
   1160     int3
   1161 #else
   1162     testl %edi, %edi
   1163 //  testq %rdi, %rdi
   1164     jnz art_quick_aput_obj_with_bound_check
   1165     jmp art_quick_throw_null_pointer_exception
   1166 #endif  // __APPLE__
   1167 END_FUNCTION art_quick_aput_obj_with_null_and_bound_check
   1168 
   1169 
   1170 DEFINE_FUNCTION art_quick_aput_obj_with_bound_check
   1171 #if defined(__APPLE__)
   1172     int3
   1173     int3
   1174 #else
   1175     movl MIRROR_ARRAY_LENGTH_OFFSET(%edi), %ecx
   1176 //  movl MIRROR_ARRAY_LENGTH_OFFSET(%rdi), %ecx  // This zero-extends, so value(%rcx)=value(%ecx)
   1177     cmpl %ecx, %esi
   1178     jb art_quick_aput_obj
   1179     mov %esi, %edi
   1180 //  mov %rsi, %rdi
   1181     mov %ecx, %esi
   1182 //  mov %rcx, %rsi
   1183     jmp art_quick_throw_array_bounds
   1184 #endif  // __APPLE__
   1185 END_FUNCTION art_quick_aput_obj_with_bound_check
   1186 
   1187 
   1188 DEFINE_FUNCTION art_quick_aput_obj
   1189     testl %edx, %edx                // store of null
   1190 //  test %rdx, %rdx
   1191     jz .Ldo_aput_null
   1192     movl MIRROR_OBJECT_CLASS_OFFSET(%edi), %ecx
   1193 //  movq MIRROR_OBJECT_CLASS_OFFSET(%rdi), %rcx
   1194     movl MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%ecx), %ecx
   1195 //  movq MIRROR_CLASS_COMPONENT_TYPE_OFFSET(%rcx), %rcx
   1196     cmpl MIRROR_OBJECT_CLASS_OFFSET(%edx), %ecx // value's type == array's component type - trivial assignability
   1197 //  cmpq MIRROR_CLASS_OFFSET(%rdx), %rcx
   1198     jne .Lcheck_assignability
   1199 .Ldo_aput:
   1200     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1201 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1202     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
   1203     shrl LITERAL(7), %edi
   1204 //  shrl LITERAL(7), %rdi
   1205     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
   1206     ret
   1207 .Ldo_aput_null:
   1208     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1209 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1210     ret
   1211 .Lcheck_assignability:
   1212     // Save arguments.
   1213     PUSH rdi
   1214     PUSH rsi
   1215     PUSH rdx
   1216     subq LITERAL(8), %rsp        // Alignment padding.
   1217     CFI_ADJUST_CFA_OFFSET(8)
   1218     SETUP_FP_CALLEE_SAVE_FRAME
   1219 
   1220                                   // "Uncompress" = do nothing, as already zero-extended on load.
   1221     movl MIRROR_OBJECT_CLASS_OFFSET(%edx), %esi // Pass arg2 = value's class.
   1222     movq %rcx, %rdi               // Pass arg1 = array's component type.
   1223 
   1224     call SYMBOL(artIsAssignableFromCode)  // (Class* a, Class* b)
   1225 
   1226     // Exception?
   1227     testq %rax, %rax
   1228     jz   .Lthrow_array_store_exception
   1229 
   1230     RESTORE_FP_CALLEE_SAVE_FRAME
   1231     // Restore arguments.
   1232     addq LITERAL(8), %rsp
   1233     CFI_ADJUST_CFA_OFFSET(-8)
   1234     POP  rdx
   1235     POP  rsi
   1236     POP  rdi
   1237 
   1238     movl %edx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%edi, %esi, 4)
   1239 //  movq %rdx, MIRROR_OBJECT_ARRAY_DATA_OFFSET(%rdi, %rsi, 4)
   1240     movq %gs:THREAD_CARD_TABLE_OFFSET, %rdx
   1241     shrl LITERAL(7), %edi
   1242 //  shrl LITERAL(7), %rdi
   1243     movb %dl, (%rdx, %rdi)                       // Note: this assumes that top 32b of %rdi are zero
   1244 //  movb %dl, (%rdx, %rdi)
   1245     ret
   1246     CFI_ADJUST_CFA_OFFSET(32 + 4 * 8)  // Reset unwind info so following code unwinds.
   1247 .Lthrow_array_store_exception:
   1248     RESTORE_FP_CALLEE_SAVE_FRAME
   1249     // Restore arguments.
   1250     addq LITERAL(8), %rsp
   1251     CFI_ADJUST_CFA_OFFSET(-8)
   1252     POP  rdx
   1253     POP  rsi
   1254     POP  rdi
   1255 
   1256     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  // Save all registers as basis for long jump context.
   1257 
   1258     // Outgoing argument set up.
   1259     movq %rdx, %rsi                         // Pass arg 2 = value.
   1260     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass arg 3 = Thread::Current().
   1261                                             // Pass arg 1 = array.
   1262     call SYMBOL(artThrowArrayStoreException) // (array, value, Thread*)
   1263     int3                          // unreached
   1264 END_FUNCTION art_quick_aput_obj
   1265 
   1266 // TODO: This is quite silly on X86_64 now.
   1267 DEFINE_FUNCTION art_quick_memcpy
   1268     call PLT_SYMBOL(memcpy)       // (void*, const void*, size_t)
   1269     ret
   1270 END_FUNCTION art_quick_memcpy
   1271 
   1272 NO_ARG_DOWNCALL art_quick_test_suspend, artTestSuspendFromCode, ret
   1273 
   1274 UNIMPLEMENTED art_quick_ldiv
   1275 UNIMPLEMENTED art_quick_lmod
   1276 UNIMPLEMENTED art_quick_lmul
   1277 UNIMPLEMENTED art_quick_lshl
   1278 UNIMPLEMENTED art_quick_lshr
   1279 UNIMPLEMENTED art_quick_lushr
   1280 
   1281 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_EAX_ZERO
   1282 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_EAX_ZERO
   1283 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_EAX_ZERO
   1284 THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCode, RETURN_IF_EAX_ZERO
   1285 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_EAX_ZERO
   1286 
   1287 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1288 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1289 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1290 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1291 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1292 TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1293 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1294 
   1295 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_EAX_ZERO
   1296 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_EAX_ZERO
   1297 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_EAX_ZERO
   1298 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_EAX_ZERO
   1299 
   1300 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1301 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1302 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1303 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1304 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1305 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1306 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION
   1307 
   1308 // This is singled out as the argument order is different.
   1309 DEFINE_FUNCTION art_quick_set64_static
   1310     movq %rsi, %rdx                      // pass new_val
   1311     movq 8(%rsp), %rsi                   // pass referrer
   1312     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
   1313                                          // field_idx is in rdi
   1314     movq %gs:THREAD_SELF_OFFSET, %rcx    // pass Thread::Current()
   1315     call SYMBOL(artSet64StaticFromCode)  // (field_idx, referrer, new_val, Thread*)
   1316     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  // restore frame up to return address
   1317     RETURN_IF_EAX_ZERO                   // return or deliver exception
   1318 END_FUNCTION art_quick_set64_static
   1319 
   1320 
   1321 DEFINE_FUNCTION art_quick_proxy_invoke_handler
   1322     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
   1323 
   1324     movq %gs:THREAD_SELF_OFFSET, %rdx       // Pass Thread::Current().
   1325     movq %rsp, %rcx                         // Pass SP.
   1326     call SYMBOL(artQuickProxyInvokeHandler) // (proxy method, receiver, Thread*, SP)
   1327     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1328     movq %rax, %xmm0                        // Copy return value in case of float returns.
   1329     RETURN_OR_DELIVER_PENDING_EXCEPTION
   1330 END_FUNCTION art_quick_proxy_invoke_handler
   1331 
   1332     /*
   1333      * Called to resolve an imt conflict.
   1334      * rax is a hidden argument that holds the target method's dex method index.
   1335      */
   1336 DEFINE_FUNCTION art_quick_imt_conflict_trampoline
   1337 #if defined(__APPLE__)
   1338     int3
   1339     int3
   1340 #else
   1341     movq 8(%rsp), %rdi            // load caller Method*
   1342     movl ART_METHOD_DEX_CACHE_METHODS_OFFSET(%rdi), %edi     // load dex_cache_resolved_methods
   1343     movq MIRROR_LONG_ARRAY_DATA_OFFSET(%rdi, %rax, 8), %rdi  // load the target method
   1344     jmp art_quick_invoke_interface_trampoline
   1345 #endif  // __APPLE__
   1346 END_FUNCTION art_quick_imt_conflict_trampoline
   1347 
   1348 DEFINE_FUNCTION art_quick_resolution_trampoline
   1349     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1350     movq %gs:THREAD_SELF_OFFSET, %rdx
   1351     movq %rsp, %rcx
   1352     call SYMBOL(artQuickResolutionTrampoline) // (called, receiver, Thread*, SP)
   1353     movq %rax, %r10               // Remember returned code pointer in R10.
   1354     movq (%rsp), %rdi             // Load called method into RDI.
   1355     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1356     testq %r10, %r10              // If code pointer is null goto deliver pending exception.
   1357     jz 1f
   1358     jmp *%r10                     // Tail call into method.
   1359 1:
   1360     DELIVER_PENDING_EXCEPTION
   1361 END_FUNCTION art_quick_resolution_trampoline
   1362 
   1363 /* Generic JNI frame layout:
   1364  *
   1365  * #-------------------#
   1366  * |                   |
   1367  * | caller method...  |
   1368  * #-------------------#    <--- SP on entry
   1369  *
   1370  *          |
   1371  *          V
   1372  *
   1373  * #-------------------#
   1374  * | caller method...  |
   1375  * #-------------------#
   1376  * | Return            |
   1377  * | R15               |    callee save
   1378  * | R14               |    callee save
   1379  * | R13               |    callee save
   1380  * | R12               |    callee save
   1381  * | R9                |    arg5
   1382  * | R8                |    arg4
   1383  * | RSI/R6            |    arg1
   1384  * | RBP/R5            |    callee save
   1385  * | RBX/R3            |    callee save
   1386  * | RDX/R2            |    arg2
   1387  * | RCX/R1            |    arg3
   1388  * | XMM7              |    float arg 8
   1389  * | XMM6              |    float arg 7
   1390  * | XMM5              |    float arg 6
   1391  * | XMM4              |    float arg 5
   1392  * | XMM3              |    float arg 4
   1393  * | XMM2              |    float arg 3
   1394  * | XMM1              |    float arg 2
   1395  * | XMM0              |    float arg 1
   1396  * | RDI/Method*       |  <- sp
   1397  * #-------------------#
   1398  * | Scratch Alloca    |    5K scratch space
   1399  * #---------#---------#
   1400  * |         | sp*     |
   1401  * | Tramp.  #---------#
   1402  * | args    | thread  |
   1403  * | Tramp.  #---------#
   1404  * |         | method  |
   1405  * #-------------------#    <--- SP on artQuickGenericJniTrampoline
   1406  *
   1407  *           |
   1408  *           v              artQuickGenericJniTrampoline
   1409  *
   1410  * #-------------------#
   1411  * | caller method...  |
   1412  * #-------------------#
   1413  * | Return            |
   1414  * | Callee-Save Data  |
   1415  * #-------------------#
   1416  * | handle scope      |
   1417  * #-------------------#
   1418  * | Method*           |    <--- (1)
   1419  * #-------------------#
   1420  * | local ref cookie  | // 4B
   1421  * | handle scope size | // 4B   TODO: roll into call stack alignment?
   1422  * #-------------------#
   1423  * | JNI Call Stack    |
   1424  * #-------------------#    <--- SP on native call
   1425  * |                   |
   1426  * | Stack for Regs    |    The trampoline assembly will pop these values
   1427  * |                   |    into registers for native call
   1428  * #-------------------#
   1429  * | Native code ptr   |
   1430  * #-------------------#
   1431  * | Free scratch      |
   1432  * #-------------------#
   1433  * | Ptr to (1)        |    <--- RSP
   1434  * #-------------------#
   1435  */
   1436     /*
   1437      * Called to do a generic JNI down-call
   1438      */
   1439 DEFINE_FUNCTION art_quick_generic_jni_trampoline
   1440     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_RDI
   1441 
   1442     movq %rsp, %rbp                 // save SP at (old) callee-save frame
   1443     CFI_DEF_CFA_REGISTER(rbp)
   1444 
   1445     //
   1446     // reserve a lot of space
   1447     //
   1448     //      4    local state ref
   1449     //      4    padding
   1450     //   4196    4k scratch space, enough for 2x 256 8-byte parameters (TODO: handle scope overhead?)
   1451     //     16    handle scope member fields ?
   1452     // +  112    14x 8-byte stack-2-register space
   1453     // ------
   1454     //   4332
   1455     // 16-byte aligned: 4336
   1456     // Note: 14x8 = 7*16, so the stack stays aligned for the native call...
   1457     //       Also means: the padding is somewhere in the middle
   1458     //
   1459     //
   1460     // New test: use 5K and release
   1461     // 5k = 5120
   1462     subq LITERAL(5120), %rsp
   1463     // prepare for artQuickGenericJniTrampoline call
   1464     // (Thread*,  SP)
   1465     //    rdi    rsi      <= C calling convention
   1466     //  gs:...   rbp      <= where they are
   1467     movq %gs:THREAD_SELF_OFFSET, %rdi
   1468     movq %rbp, %rsi
   1469     call SYMBOL(artQuickGenericJniTrampoline)  // (Thread*, sp)
   1470 
   1471     // The C call will have registered the complete save-frame on success.
   1472     // The result of the call is:
   1473     // %rax: pointer to native code, 0 on error.
   1474     // %rdx: pointer to the bottom of the used area of the alloca, can restore stack till there.
   1475 
   1476     // Check for error = 0.
   1477     test %rax, %rax
   1478     jz .Lexception_in_native
   1479 
   1480     // Release part of the alloca.
   1481     movq %rdx, %rsp
   1482 
   1483     // pop from the register-passing alloca region
   1484     // what's the right layout?
   1485     popq %rdi
   1486     popq %rsi
   1487     popq %rdx
   1488     popq %rcx
   1489     popq %r8
   1490     popq %r9
   1491     // TODO: skip floating point if unused, some flag.
   1492     movq 0(%rsp), %xmm0
   1493     movq 8(%rsp), %xmm1
   1494     movq 16(%rsp), %xmm2
   1495     movq 24(%rsp), %xmm3
   1496     movq 32(%rsp), %xmm4
   1497     movq 40(%rsp), %xmm5
   1498     movq 48(%rsp), %xmm6
   1499     movq 56(%rsp), %xmm7
   1500     addq LITERAL(64), %rsp          // floating-point done
   1501 
   1502     // native call
   1503     call *%rax
   1504 
   1505     // result sign extension is handled in C code
   1506     // prepare for artQuickGenericJniEndTrampoline call
   1507     // (Thread*,  result, result_f)
   1508     //   rdi      rsi   rdx       <= C calling convention
   1509     //  gs:...    rax   xmm0      <= where they are
   1510     movq %gs:THREAD_SELF_OFFSET, %rdi
   1511     movq %rax, %rsi
   1512     movq %xmm0, %rdx
   1513     call SYMBOL(artQuickGenericJniEndTrampoline)
   1514 
   1515     // Pending exceptions possible.
   1516     // TODO: use cmpq, needs direct encoding because of gas bug
   1517     movq %gs:THREAD_EXCEPTION_OFFSET, %rcx
   1518     test %rcx, %rcx
   1519     jnz .Lexception_in_native
   1520 
   1521     // Tear down the alloca.
   1522     movq %rbp, %rsp
   1523     CFI_DEF_CFA_REGISTER(rsp)
   1524 
   1525     // Tear down the callee-save frame.
   1526     // Load FPRs.
   1527     // movq %xmm0, 16(%rsp)         // doesn't make sense!!!
   1528     movq 24(%rsp), %xmm1            // neither does this!!!
   1529     movq 32(%rsp), %xmm2
   1530     movq 40(%rsp), %xmm3
   1531     movq 48(%rsp), %xmm4
   1532     movq 56(%rsp), %xmm5
   1533     movq 64(%rsp), %xmm6
   1534     movq 72(%rsp), %xmm7
   1535     movq 80(%rsp), %xmm12
   1536     movq 88(%rsp), %xmm13
   1537     movq 96(%rsp), %xmm14
   1538     movq 104(%rsp), %xmm15
   1539     // was 80 bytes
   1540     addq LITERAL(80 + 4*8), %rsp
   1541     CFI_ADJUST_CFA_OFFSET(-80 - 4*8)
   1542     // Save callee and GPR args, mixed together to agree with core spills bitmap.
   1543     POP rcx  // Arg.
   1544     POP rdx  // Arg.
   1545     POP rbx  // Callee save.
   1546     POP rbp  // Callee save.
   1547     POP rsi  // Arg.
   1548     POP r8   // Arg.
   1549     POP r9   // Arg.
   1550     POP r12  // Callee save.
   1551     POP r13  // Callee save.
   1552     POP r14  // Callee save.
   1553     POP r15  // Callee save.
   1554     // store into fpr, for when it's a fpr return...
   1555     movq %rax, %xmm0
   1556     ret
   1557 .Lexception_in_native:
   1558     movq %gs:THREAD_TOP_QUICK_FRAME_OFFSET, %rsp
   1559     CFI_DEF_CFA_REGISTER(rsp)
   1560     // Do a call to push a new save-all frame required by the runtime.
   1561     call .Lexception_call
   1562 .Lexception_call:
   1563     DELIVER_PENDING_EXCEPTION
   1564 END_FUNCTION art_quick_generic_jni_trampoline
   1565 
   1566     /*
   1567      * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
   1568      * of a quick call:
   1569      * RDI = method being called / to bridge to.
   1570      * RSI, RDX, RCX, R8, R9 are arguments to that method.
   1571      */
   1572 DEFINE_FUNCTION art_quick_to_interpreter_bridge
   1573     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME   // Set up frame and save arguments.
   1574     movq %gs:THREAD_SELF_OFFSET, %rsi      // RSI := Thread::Current()
   1575     movq %rsp, %rdx                        // RDX := sp
   1576     call SYMBOL(artQuickToInterpreterBridge)  // (method, Thread*, SP)
   1577     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME  // TODO: no need to restore arguments in this case.
   1578     movq %rax, %xmm0                   // Place return value also into floating point return value.
   1579     RETURN_OR_DELIVER_PENDING_EXCEPTION    // return or deliver exception
   1580 END_FUNCTION art_quick_to_interpreter_bridge
   1581 
   1582     /*
   1583      * Routine that intercepts method calls and returns.
   1584      */
   1585 DEFINE_FUNCTION art_quick_instrumentation_entry
   1586 #if defined(__APPLE__)
   1587     int3
   1588     int3
   1589 #else
   1590     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1591 
   1592     movq %rdi, %r12               // Preserve method pointer in a callee-save.
   1593 
   1594     movq %gs:THREAD_SELF_OFFSET, %rdx   // Pass thread.
   1595     movq FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp), %rcx   // Pass return PC.
   1596 
   1597     call SYMBOL(artInstrumentationMethodEntryFromCode) // (Method*, Object*, Thread*, LR)
   1598 
   1599                                   // %rax = result of call.
   1600     movq %r12, %rdi               // Reload method pointer.
   1601 
   1602     leaq art_quick_instrumentation_exit(%rip), %r12   // Set up return through instrumentation
   1603     movq %r12, FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-8(%rsp) // exit.
   1604 
   1605     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1606 
   1607     jmp *%rax                     // Tail call to intended method.
   1608 #endif  // __APPLE__
   1609 END_FUNCTION art_quick_instrumentation_entry
   1610 
   1611 DEFINE_FUNCTION art_quick_instrumentation_exit
   1612     pushq LITERAL(0)          // Push a fake return PC as there will be none on the stack.
   1613 
   1614     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME
   1615 
   1616     // We need to save rax and xmm0. We could use a callee-save from SETUP_REF_ONLY, but then
   1617     // we would need to fully restore it. As there are a good number of callee-save registers, it
   1618     // seems easier to have an extra small stack area. But this should be revisited.
   1619 
   1620     movq  %rsp, %rsi                          // Pass SP.
   1621 
   1622     PUSH rax                  // Save integer result.
   1623     subq LITERAL(8), %rsp     // Save floating-point result.
   1624     CFI_ADJUST_CFA_OFFSET(8)
   1625     movq %xmm0, (%rsp)
   1626 
   1627     movq  %gs:THREAD_SELF_OFFSET, %rdi        // Pass Thread.
   1628     movq  %rax, %rdx                          // Pass integer result.
   1629     movq  %xmm0, %rcx                         // Pass floating-point result.
   1630 
   1631     call SYMBOL(artInstrumentationMethodExitFromCode)   // (Thread*, SP, gpr_res, fpr_res)
   1632 
   1633     movq  %rax, %rdi          // Store return PC
   1634     movq  %rdx, %rsi          // Store second return PC in hidden arg.
   1635 
   1636     movq (%rsp), %xmm0        // Restore floating-point result.
   1637     addq LITERAL(8), %rsp
   1638     CFI_ADJUST_CFA_OFFSET(-8)
   1639     POP rax                   // Restore integer result.
   1640 
   1641     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1642 
   1643     addq LITERAL(8), %rsp     // Drop fake return pc.
   1644 
   1645     jmp   *%rdi               // Return.
   1646 END_FUNCTION art_quick_instrumentation_exit
   1647 
   1648     /*
   1649      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   1650      * will long jump to the upcall with a special exception of -1.
   1651      */
   1652 DEFINE_FUNCTION art_quick_deoptimize
   1653     pushq %rsi                     // Entry point for a jump. Fake that we were called.
   1654                                    // Use hidden arg.
   1655 .globl SYMBOL(art_quick_deoptimize_from_compiled_slow_path)  // Entry point for real calls
   1656                                                              // from compiled slow paths.
   1657 SYMBOL(art_quick_deoptimize_from_compiled_slow_path):
   1658     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME
   1659                                    // Stack should be aligned now.
   1660     movq %gs:THREAD_SELF_OFFSET, %rdi         // Pass Thread.
   1661     call SYMBOL(artDeoptimize) // artDeoptimize(Thread*)
   1662     int3                           // Unreachable.
   1663 END_FUNCTION art_quick_deoptimize
   1664 
   1665     /*
   1666      * String's compareTo.
   1667      *
   1668      * On entry:
   1669      *    rdi:   this string object (known non-null)
   1670      *    rsi:   comp string object (known non-null)
   1671      */
   1672 DEFINE_FUNCTION art_quick_string_compareto
   1673     movl MIRROR_STRING_COUNT_OFFSET(%edi), %r8d
   1674     movl MIRROR_STRING_COUNT_OFFSET(%esi), %r9d
   1675     /* Build pointers to the start of string data */
   1676     leal MIRROR_STRING_VALUE_OFFSET(%edi), %edi
   1677     leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
   1678     /* Calculate min length and count diff */
   1679     movl  %r8d, %ecx
   1680     movl  %r8d, %eax
   1681     subl  %r9d, %eax
   1682     cmovg %r9d, %ecx
   1683     /*
   1684      * At this point we have:
   1685      *   eax: value to return if first part of strings are equal
   1686      *   ecx: minimum among the lengths of the two strings
   1687      *   esi: pointer to comp string data
   1688      *   edi: pointer to this string data
   1689      */
   1690     jecxz .Lkeep_length
   1691     repe cmpsw                    // find nonmatching chars in [%esi] and [%edi], up to length %ecx
   1692     jne .Lnot_equal
   1693 .Lkeep_length:
   1694     ret
   1695     .balign 16
   1696 .Lnot_equal:
   1697     movzwl  -2(%edi), %eax        // get last compared char from this string
   1698     movzwl  -2(%esi), %ecx        // get last compared char from comp string
   1699     subl  %ecx, %eax              // return the difference
   1700     ret
   1701 END_FUNCTION art_quick_string_compareto
   1702 
   1703 UNIMPLEMENTED art_quick_memcmp16
   1704 
   1705 DEFINE_FUNCTION art_quick_assignable_from_code
   1706     SETUP_FP_CALLEE_SAVE_FRAME
   1707     call SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
   1708     RESTORE_FP_CALLEE_SAVE_FRAME
   1709     ret
   1710 END_FUNCTION art_quick_assignable_from_code
   1711 
   1712 
   1713 // Return from a nested signal:
   1714 // Entry:
   1715 //  rdi: address of jmp_buf in TLS
   1716 
   1717 DEFINE_FUNCTION art_nested_signal_return
   1718                                     // first arg to longjmp is already in correct register
   1719     movq LITERAL(1), %rsi           // second arg to longjmp (1)
   1720     call PLT_SYMBOL(longjmp)
   1721     int3                            // won't get here
   1722 END_FUNCTION art_nested_signal_return
   1723