Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_arm.S"
     18 
     19 #include "arch/quick_alloc_entrypoints.S"
     20 
     21     /* Deliver the given exception */
     22     .extern artDeliverExceptionFromCode
     23     /* Deliver an exception pending on a thread */
     24     .extern artDeliverPendingException
     25 
     26     /*
     27      * Macro to spill the GPRs.
     28      */
     29 .macro SPILL_ALL_CALLEE_SAVE_GPRS
     30     push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
     31     .cfi_adjust_cfa_offset 36
     32     .cfi_rel_offset r4, 0
     33     .cfi_rel_offset r5, 4
     34     .cfi_rel_offset r6, 8
     35     .cfi_rel_offset r7, 12
     36     .cfi_rel_offset r8, 16
     37     .cfi_rel_offset r9, 20
     38     .cfi_rel_offset r10, 24
     39     .cfi_rel_offset r11, 28
     40     .cfi_rel_offset lr, 32
     41 .endm
     42 
     43     /*
     44      * Macro that sets up the callee save frame to conform with
     45      * Runtime::CreateCalleeSaveMethod(kSaveAll)
     46      */
     47 .macro SETUP_SAVE_ALL_CALLEE_SAVE_FRAME rTemp1, rTemp2
     48     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     49     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     50     .cfi_adjust_cfa_offset 64
     51     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     52     .cfi_adjust_cfa_offset 12
     53     RUNTIME_CURRENT1 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
     54     ldr \rTemp1, [\rTemp1, #RUNTIME_SAVE_ALL_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kSaveAll Method*.
     55     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     56     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     57 
     58      // Ugly compile-time check, but we only have the preprocessor.
     59 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVE != 36 + 64 + 12)
     60 #error "SAVE_ALL_CALLEE_SAVE_FRAME(ARM) size not as expected."
     61 #endif
     62 .endm
     63 
     64     /*
     65      * Macro that sets up the callee save frame to conform with
     66      * Runtime::CreateCalleeSaveMethod(kRefsOnly).
     67      */
     68 .macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME rTemp1, rTemp2
     69     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     70     .cfi_adjust_cfa_offset 28
     71     .cfi_rel_offset r5, 0
     72     .cfi_rel_offset r6, 4
     73     .cfi_rel_offset r7, 8
     74     .cfi_rel_offset r8, 12
     75     .cfi_rel_offset r10, 16
     76     .cfi_rel_offset r11, 20
     77     .cfi_rel_offset lr, 24
     78     sub sp, #4                                    @ bottom word will hold Method*
     79     .cfi_adjust_cfa_offset 4
     80     RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
     81     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
     82     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
     83     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     84 
     85     // Ugly compile-time check, but we only have the preprocessor.
     86 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
     87 #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
     88 #endif
     89 .endm
     90 
     91     /*
     92      * Macro that sets up the callee save frame to conform with
     93      * Runtime::CreateCalleeSaveMethod(kRefsOnly)
     94      * and preserves the value of rTemp2 at entry.
     95      */
     96 .macro SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2 rTemp1, rTemp2
     97     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     98     .cfi_adjust_cfa_offset 28
     99     .cfi_rel_offset r5, 0
    100     .cfi_rel_offset r6, 4
    101     .cfi_rel_offset r7, 8
    102     .cfi_rel_offset r8, 12
    103     .cfi_rel_offset r10, 16
    104     .cfi_rel_offset r11, 20
    105     .cfi_rel_offset lr, 24
    106     sub sp, #4                                    @ bottom word will hold Method*
    107     .cfi_adjust_cfa_offset 4
    108     str \rTemp2, [sp, #0]                         @ save rTemp2
    109     RUNTIME_CURRENT2 \rTemp1, \rTemp2             @ Load Runtime::Current into rTemp1.
    110     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_ONLY_CALLEE_SAVE_FRAME_OFFSET] @ rTemp1 is kRefsOnly Method*.
    111     ldr \rTemp2, [sp, #0]                         @ restore rTemp2
    112     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
    113     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    114 
    115     // Ugly compile-time check, but we only have the preprocessor.
    116 #if (FRAME_SIZE_REFS_ONLY_CALLEE_SAVE != 28 + 4)
    117 #error "REFS_ONLY_CALLEE_SAVE_FRAME(ARM) size not as expected."
    118 #endif
    119 .endm
    120 
    121 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    122     add sp, #4               @ bottom word holds Method*
    123     .cfi_adjust_cfa_offset -4
    124     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
    125     .cfi_restore r5
    126     .cfi_restore r6
    127     .cfi_restore r7
    128     .cfi_restore r8
    129     .cfi_restore r10
    130     .cfi_restore r11
    131     .cfi_restore lr
    132     .cfi_adjust_cfa_offset -28
    133 .endm
    134 
    135 .macro RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
    136     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    137     bx  lr                   @ return
    138 .endm
    139 
    140     /*
    141      * Macro that sets up the callee save frame to conform with
    142      * Runtime::CreateCalleeSaveMethod(kRefsAndArgs).
    143      */
    144 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
    145     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
    146     .cfi_adjust_cfa_offset 40
    147     .cfi_rel_offset r1, 0
    148     .cfi_rel_offset r2, 4
    149     .cfi_rel_offset r3, 8
    150     .cfi_rel_offset r5, 12
    151     .cfi_rel_offset r6, 16
    152     .cfi_rel_offset r7, 20
    153     .cfi_rel_offset r8, 24
    154     .cfi_rel_offset r10, 28
    155     .cfi_rel_offset r11, 32
    156     .cfi_rel_offset lr, 36
    157     vpush {s0-s15}                     @ 16 words of float args.
    158     .cfi_adjust_cfa_offset 64
    159     sub sp, #8                         @ 2 words of space, bottom word will hold Method*
    160     .cfi_adjust_cfa_offset 8
    161     // Ugly compile-time check, but we only have the preprocessor.
    162 #if (FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE != 40 + 64 + 8)
    163 #error "REFS_AND_ARGS_CALLEE_SAVE_FRAME(ARM) size not as expected."
    164 #endif
    165 .endm
    166 
    167 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME rTemp1, rTemp2
    168     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
    169     RUNTIME_CURRENT3 \rTemp1, \rTemp2  @ Load Runtime::Current into rTemp1.
    170      @ rTemp1 is kRefsAndArgs Method*.
    171     ldr \rTemp1, [\rTemp1, #RUNTIME_REFS_AND_ARGS_CALLEE_SAVE_FRAME_OFFSET]
    172     str \rTemp1, [sp, #0]                         @ Place Method* at bottom of stack.
    173     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    174 .endm
    175 
    176 .macro SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
    177     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_REGISTERS_ONLY
    178     str r0, [sp, #0]                   @ Store ArtMethod* to bottom of stack.
    179     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    180 .endm
    181 
    182 .macro RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
    183     add  sp, #8                      @ rewind sp
    184     .cfi_adjust_cfa_offset -8
    185     vpop {s0-s15}
    186     .cfi_adjust_cfa_offset -64
    187     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
    188     .cfi_restore r1
    189     .cfi_restore r2
    190     .cfi_restore r3
    191     .cfi_restore r5
    192     .cfi_restore r6
    193     .cfi_restore r7
    194     .cfi_restore r8
    195     .cfi_restore r10
    196     .cfi_restore r11
    197     .cfi_restore lr
    198     .cfi_adjust_cfa_offset -40
    199 .endm
    200 
    201 .macro RETURN_IF_RESULT_IS_ZERO
    202     cbnz   r0, 1f              @ result non-zero branch over
    203     bx     lr                  @ return
    204 1:
    205 .endm
    206 
    207 .macro RETURN_IF_RESULT_IS_NON_ZERO
    208     cbz    r0, 1f              @ result zero branch over
    209     bx     lr                  @ return
    210 1:
    211 .endm
    212 
    213     /*
    214      * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
    215      * exception is Thread::Current()->exception_
    216      */
    217 .macro DELIVER_PENDING_EXCEPTION
    218     .fnend
    219     .fnstart
    220     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1    @ save callee saves for throw
    221     mov    r0, r9                              @ pass Thread::Current
    222     b      artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
    223 .endm
    224 
    225 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    226     .extern \cxx_name
    227 ENTRY \c_name
    228     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r0, r1 // save all registers as basis for long jump context
    229     mov r0, r9                      @ pass Thread::Current
    230     b   \cxx_name                   @ \cxx_name(Thread*)
    231 END \c_name
    232 .endm
    233 
    234 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    235     .extern \cxx_name
    236 ENTRY \c_name
    237     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r1, r2  // save all registers as basis for long jump context
    238     mov r1, r9                      @ pass Thread::Current
    239     b   \cxx_name                   @ \cxx_name(Thread*)
    240 END \c_name
    241 .endm
    242 
    243 .macro TWO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    244     .extern \cxx_name
    245 ENTRY \c_name
    246     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME  r2, r3  // save all registers as basis for long jump context
    247     mov r2, r9                      @ pass Thread::Current
    248     b   \cxx_name                   @ \cxx_name(Thread*)
    249 END \c_name
    250 .endm
    251 
    252 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
    253     ldr \reg, [r9, #THREAD_EXCEPTION_OFFSET]   // Get exception field.
    254     cbnz \reg, 1f
    255     bx lr
    256 1:
    257     DELIVER_PENDING_EXCEPTION
    258 .endm
    259 
    260 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    261     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
    262 .endm
    263 
    264 .macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    265     RETURN_IF_RESULT_IS_ZERO
    266     DELIVER_PENDING_EXCEPTION
    267 .endm
    268 
    269 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    270     RETURN_IF_RESULT_IS_NON_ZERO
    271     DELIVER_PENDING_EXCEPTION
    272 .endm
    273 
    274 // Macros taking opportunity of code similarities for downcalls with referrer for non-wide fields.
    275 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
    276     .extern \entrypoint
    277 ENTRY \name
    278     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case of GC
    279     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    280     mov    r2, r9                        @ pass Thread::Current
    281     bl     \entrypoint                   @ (uint32_t field_idx, const Method* referrer, Thread*)
    282     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    283     \return
    284 END \name
    285 .endm
    286 
    287 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
    288     .extern \entrypoint
    289 ENTRY \name
    290     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
    291     ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    292     mov    r3, r9                        @ pass Thread::Current
    293     bl     \entrypoint                   @ (field_idx, Object*, referrer, Thread*)
    294     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    295     \return
    296 END \name
    297 .endm
    298 
    299 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
    300     .extern \entrypoint
    301 ENTRY \name
    302     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r3, r12  @ save callee saves in case of GC
    303     ldr    r3, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    304     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
    305     .cfi_adjust_cfa_offset 16
    306     bl     \entrypoint                   @ (field_idx, Object*, new_val, referrer, Thread*)
    307     add    sp, #16                       @ release out args
    308     .cfi_adjust_cfa_offset -16
    309     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
    310     \return
    311 END \name
    312 .endm
    313 
    314     /*
    315      * Called by managed code, saves callee saves and then calls artThrowException
    316      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    317      */
    318 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    319 
    320     /*
    321      * Called by managed code to create and deliver a NullPointerException.
    322      */
    323 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    324 
    325     /*
    326      * Called by managed code to create and deliver an ArithmeticException.
    327      */
    328 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_div_zero, artThrowDivZeroFromCode
    329 
    330     /*
    331      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    332      * index, arg2 holds limit.
    333      */
    334 TWO_ARG_RUNTIME_EXCEPTION art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    335 
    336     /*
    337      * Called by managed code to create and deliver a StackOverflowError.
    338      */
    339 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    340 
    341     /*
    342      * Called by managed code to create and deliver a NoSuchMethodError.
    343      */
    344 ONE_ARG_RUNTIME_EXCEPTION art_quick_throw_no_such_method, artThrowNoSuchMethodFromCode
    345 
    346     /*
    347      * All generated callsites for interface invokes and invocation slow paths will load arguments
    348      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
    349      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
    350      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
    351      *
    352      * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
    353      * of the target Method* in r0 and method->code_ in r1.
    354      *
    355      * If unsuccessful, the helper will return null/null. There will bea pending exception in the
    356      * thread and we branch to another stub to deliver it.
    357      *
    358      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
    359      * pointing back to the original caller.
    360      */
    361 .macro INVOKE_TRAMPOLINE_BODY cxx_name
    362     .extern \cxx_name
    363     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case allocation triggers GC
    364     mov    r2, r9                         @ pass Thread::Current
    365     mov    r3, sp
    366     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
    367     mov    r12, r1                        @ save Method*->code_
    368     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
    369     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
    370     bx     r12                            @ tail call to target
    371 1:
    372     DELIVER_PENDING_EXCEPTION
    373 .endm
    374 .macro INVOKE_TRAMPOLINE c_name, cxx_name
    375 ENTRY \c_name
    376     INVOKE_TRAMPOLINE_BODY \cxx_name
    377 END \c_name
    378 .endm
    379 
    380 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    381 
    382 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    383 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    384 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    385 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    386 
    387     /*
    388      * Quick invocation stub internal.
    389      * On entry:
    390      *   r0 = method pointer
    391      *   r1 = argument array or null for no argument methods
    392      *   r2 = size of argument array in bytes
    393      *   r3 = (managed) thread pointer
    394      *   [sp] = JValue* result
    395      *   [sp + 4] = result_in_float
    396      *   [sp + 8] = core register argument array
    397      *   [sp + 12] = fp register argument array
    398      *  +-------------------------+
    399      *  | uint32_t* fp_reg_args   |
    400      *  | uint32_t* core_reg_args |
    401      *  |   result_in_float       | <- Caller frame
    402      *  |   Jvalue* result        |
    403      *  +-------------------------+
    404      *  |          lr             |
    405      *  |          r11            |
    406      *  |          r9             |
    407      *  |          r4             | <- r11
    408      *  +-------------------------+
    409      *  | uint32_t out[n-1]       |
    410      *  |    :      :             |        Outs
    411      *  | uint32_t out[0]         |
    412      *  | StackRef<ArtMethod>     | <- SP  value=null
    413      *  +-------------------------+
    414      */
    415 ENTRY art_quick_invoke_stub_internal
    416     SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
    417     mov    r11, sp                         @ save the stack pointer
    418     .cfi_def_cfa_register r11
    419 
    420     mov    r9, r3                          @ move managed thread pointer into r9
    421 
    422     add    r4, r2, #4                      @ create space for method pointer in frame
    423     sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
    424     and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
    425     mov    sp, r4                          @ 16B alignment ourselves.
    426 
    427     mov    r4, r0                          @ save method*
    428     add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
    429     bl     memcpy                          @ memcpy (dest, src, bytes)
    430     mov    ip, #0                          @ set ip to 0
    431     str    ip, [sp]                        @ store null for method* at bottom of frame
    432 
    433     ldr    ip, [r11, #48]                  @ load fp register argument array pointer
    434     vldm   ip, {s0-s15}                    @ copy s0 - s15
    435 
    436     ldr    ip, [r11, #44]                  @ load core register argument array pointer
    437     mov    r0, r4                          @ restore method*
    438     add    ip, ip, #4                      @ skip r0
    439     ldm    ip, {r1-r3}                     @ copy r1 - r3
    440 
    441 #ifdef ARM_R4_SUSPEND_FLAG
    442     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
    443 #endif
    444 
    445     ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
    446     blx    ip                              @ call the method
    447 
    448     mov    sp, r11                         @ restore the stack pointer
    449     .cfi_def_cfa_register sp
    450 
    451     ldr    r4, [sp, #40]                   @ load result_is_float
    452     ldr    r9, [sp, #36]                   @ load the result pointer
    453     cmp    r4, #0
    454     ite    eq
    455     strdeq r0, [r9]                        @ store r0/r1 into result pointer
    456     vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
    457 
    458     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
    459 END art_quick_invoke_stub_internal
    460 
    461     /*
    462      * On stack replacement stub.
    463      * On entry:
    464      *   r0 = stack to copy
    465      *   r1 = size of stack
    466      *   r2 = pc to call
    467      *   r3 = JValue* result
    468      *   [sp] = shorty
    469      *   [sp + 4] = thread
    470      */
    471 ENTRY art_quick_osr_stub
    472     SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
    473     mov    r11, sp                         @ Save the stack pointer
    474     mov    r10, r1                         @ Save size of stack
    475     ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
    476     mov    r8, r2                          @ Save the pc to call
    477     sub    r7, sp, #12                     @ Reserve space for stack pointer,
    478                                            @    JValue* result, and ArtMethod* slot.
    479     and    r7, #0xFFFFFFF0                 @ Align stack pointer
    480     mov    sp, r7                          @ Update stack pointer
    481     str    r11, [sp, #4]                   @ Save old stack pointer
    482     str    r3, [sp, #8]                    @ Save JValue* result
    483     mov    ip, #0
    484     str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
    485     sub    sp, sp, r1                      @ Reserve space for callee stack
    486     mov    r2, r1
    487     mov    r1, r0
    488     mov    r0, sp
    489     bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
    490     bl     .Losr_entry                     @ Call the method
    491     ldr    r10, [sp, #8]                   @ Restore JValue* result
    492     ldr    sp, [sp, #4]                    @ Restore saved stack pointer
    493     ldr    r4, [sp, #36]                   @ load shorty
    494     ldrb   r4, [r4, #0]                    @ load return type
    495     cmp    r4, #68                         @ Test if result type char == 'D'.
    496     beq    .Losr_fp_result
    497     cmp    r4, #70                         @ Test if result type char == 'F'.
    498     beq    .Losr_fp_result
    499     strd r0, [r10]                         @ Store r0/r1 into result pointer
    500     b    .Losr_exit
    501 .Losr_fp_result:
    502     vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
    503 .Losr_exit:
    504     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
    505 .Losr_entry:
    506     sub r10, r10, #4
    507     str lr, [sp, r10]                     @ Store link register per the compiler ABI
    508     bx r8
    509 END art_quick_osr_stub
    510 
    511     /*
    512      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
    513      */
    514 ARM_ENTRY art_quick_do_long_jump
    515     vldm r1, {s0-s31}     @ load all fprs from argument fprs_
    516     ldr  r2, [r0, #60]    @ r2 = r15 (PC from gprs_ 60=4*15)
    517     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
    518     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
    519     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
    520     ldr  r0, [r0, #-12]   @ load r0 value
    521     mov  r1, #0           @ clear result register r1
    522     bx   r2               @ do long jump
    523 END art_quick_do_long_jump
    524 
    525     /*
    526      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
    527      * failure.
    528      */
    529 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    530 
    531     /*
    532      * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
    533      * possibly null object to lock.
    534      */
    535     .extern artLockObjectFromCode
    536 ENTRY art_quick_lock_object
    537     cbz    r0, .Lslow_lock
    538 .Lretry_lock:
    539     ldr    r2, [r9, #THREAD_ID_OFFSET]
    540     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    541     mov    r3, r1
    542     and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
    543     cbnz   r3, .Lnot_unlocked         @ already thin locked
    544     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
    545     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
    546     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    547     cbnz   r3, .Llock_strex_fail      @ store failed, retry
    548     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
    549     bx lr
    550 .Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
    551     lsr    r3, r1, LOCK_WORD_STATE_SHIFT
    552     cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
    553     eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    554     uxth   r2, r2                     @ zero top 16 bits
    555     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
    556                                       @ else contention, go to slow path
    557     mov    r3, r1                     @ copy the lock word to check count overflow.
    558     and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits.
    559     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
    560     lsr    r3, r2, LOCK_WORD_READ_BARRIER_STATE_SHIFT  @ if either of the upper two bits (28-29) are set, we overflowed.
    561     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
    562     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
    563     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
    564     cbnz   r3, .Llock_strex_fail      @ strex failed, retry
    565     bx lr
    566 .Llock_strex_fail:
    567     b      .Lretry_lock               @ retry
    568 .Lslow_lock:
    569     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
    570     mov    r1, r9                     @ pass Thread::Current
    571     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    572     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    573     RETURN_IF_RESULT_IS_ZERO
    574     DELIVER_PENDING_EXCEPTION
    575 END art_quick_lock_object
    576 
    577 ENTRY art_quick_lock_object_no_inline
    578     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2  @ save callee saves in case we block
    579     mov    r1, r9                     @ pass Thread::Current
    580     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    581     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    582     RETURN_IF_RESULT_IS_ZERO
    583     DELIVER_PENDING_EXCEPTION
    584 END art_quick_lock_object_no_inline
    585 
    586     /*
    587      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
    588      * r0 holds the possibly null object to lock.
    589      */
    590     .extern artUnlockObjectFromCode
    591 ENTRY art_quick_unlock_object
    592     cbz    r0, .Lslow_unlock
    593 .Lretry_unlock:
    594 #ifndef USE_READ_BARRIER
    595     ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    596 #else
    597     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
    598 #endif
    599     lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
    600     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
    601     ldr    r2, [r9, #THREAD_ID_OFFSET]
    602     mov    r3, r1                     @ copy lock word to check thread id equality
    603     and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
    604     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    605     uxth   r3, r3                     @ zero top 16 bits
    606     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
    607     mov    r3, r1                     @ copy lock word to detect transition to unlocked
    608     and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED  @ zero the read barrier bits
    609     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
    610     bpl    .Lrecursive_thin_unlock
    611     @ transition to unlocked
    612     mov    r3, r1
    613     and    r3, #LOCK_WORD_READ_BARRIER_STATE_MASK  @ r3: zero except for the preserved read barrier bits
    614     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
    615 #ifndef USE_READ_BARRIER
    616     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    617 #else
    618     strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    619     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    620 #endif
    621     bx     lr
    622 .Lrecursive_thin_unlock:  @ r1: original lock word
    623     sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
    624 #ifndef USE_READ_BARRIER
    625     str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    626 #else
    627     strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    628     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    629 #endif
    630     bx     lr
    631 .Lunlock_strex_fail:
    632     b      .Lretry_unlock             @ retry
    633 .Lslow_unlock:
    634     @ save callee saves in case exception allocation triggers GC
    635     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
    636     mov    r1, r9                     @ pass Thread::Current
    637     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    638     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    639     RETURN_IF_RESULT_IS_ZERO
    640     DELIVER_PENDING_EXCEPTION
    641 END art_quick_unlock_object
    642 
    643 ENTRY art_quick_unlock_object_no_inline
    644     @ save callee saves in case exception allocation triggers GC
    645     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2
    646     mov    r1, r9                     @ pass Thread::Current
    647     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    648     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    649     RETURN_IF_RESULT_IS_ZERO
    650     DELIVER_PENDING_EXCEPTION
    651 END art_quick_unlock_object_no_inline
    652 
    653     /*
    654      * Entry from managed code that calls artIsAssignableFromCode and on failure calls
    655      * artThrowClassCastException.
    656      */
    657     .extern artThrowClassCastException
    658 ENTRY art_quick_check_cast
    659     push {r0-r1, lr}                    @ save arguments, link register and pad
    660     .cfi_adjust_cfa_offset 12
    661     .cfi_rel_offset r0, 0
    662     .cfi_rel_offset r1, 4
    663     .cfi_rel_offset lr, 8
    664     sub sp, #4
    665     .cfi_adjust_cfa_offset 4
    666     bl artIsAssignableFromCode
    667     cbz    r0, .Lthrow_class_cast_exception
    668     add sp, #4
    669     .cfi_adjust_cfa_offset -4
    670     pop {r0-r1, pc}
    671     .cfi_adjust_cfa_offset 4        @ Reset unwind info so following code unwinds.
    672 .Lthrow_class_cast_exception:
    673     add sp, #4
    674     .cfi_adjust_cfa_offset -4
    675     pop {r0-r1, lr}
    676     .cfi_adjust_cfa_offset -12
    677     .cfi_restore r0
    678     .cfi_restore r1
    679     .cfi_restore lr
    680     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r2, r3  // save all registers as basis for long jump context
    681     mov r2, r9                      @ pass Thread::Current
    682     b   artThrowClassCastException  @ (Class*, Class*, Thread*)
    683     bkpt
    684 END art_quick_check_cast
    685 
    686 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
    687 .macro POP_REG_NE rReg, offset, rExclude
    688     .ifnc \rReg, \rExclude
    689         ldr \rReg, [sp, #\offset]   @ restore rReg
    690         .cfi_restore \rReg
    691     .endif
    692 .endm
    693 
    694     /*
    695      * Macro to insert read barrier, only used in art_quick_aput_obj.
    696      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
    697      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
    698      */
    699 .macro READ_BARRIER rDest, rObj, offset
    700 #ifdef USE_READ_BARRIER
    701     push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
    702     .cfi_adjust_cfa_offset 24
    703     .cfi_rel_offset r0, 0
    704     .cfi_rel_offset r1, 4
    705     .cfi_rel_offset r2, 8
    706     .cfi_rel_offset r3, 12
    707     .cfi_rel_offset ip, 16
    708     .cfi_rel_offset lr, 20
    709     sub sp, #8                      @ push padding
    710     .cfi_adjust_cfa_offset 8
    711     @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
    712     .ifnc \rObj, r1
    713         mov r1, \rObj               @ pass rObj
    714     .endif
    715     mov r2, #\offset                @ pass offset
    716     bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
    717     @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
    718     .ifnc \rDest, r0
    719         mov \rDest, r0              @ save return value in rDest
    720     .endif
    721     add sp, #8                      @ pop padding
    722     .cfi_adjust_cfa_offset -8
    723     POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
    724     POP_REG_NE r1, 4, \rDest
    725     POP_REG_NE r2, 8, \rDest
    726     POP_REG_NE r3, 12, \rDest
    727     POP_REG_NE ip, 16, \rDest
    728     add sp, #20
    729     .cfi_adjust_cfa_offset -20
    730     pop {lr}                        @ restore lr
    731     .cfi_adjust_cfa_offset -4
    732     .cfi_restore lr
    733 #else
    734     ldr \rDest, [\rObj, #\offset]
    735     UNPOISON_HEAP_REF \rDest
    736 #endif  // USE_READ_BARRIER
    737 .endm
    738 
    739     /*
    740      * Entry from managed code for array put operations of objects where the value being stored
    741      * needs to be checked for compatibility.
    742      * r0 = array, r1 = index, r2 = value
    743      */
    744 ENTRY art_quick_aput_obj_with_null_and_bound_check
    745     tst r0, r0
    746     bne art_quick_aput_obj_with_bound_check
    747     b art_quick_throw_null_pointer_exception
    748 END art_quick_aput_obj_with_null_and_bound_check
    749 
    750     .hidden art_quick_aput_obj_with_bound_check
    751 ENTRY art_quick_aput_obj_with_bound_check
    752     ldr r3, [r0, #MIRROR_ARRAY_LENGTH_OFFSET]
    753     cmp r3, r1
    754     bhi art_quick_aput_obj
    755     mov r0, r1
    756     mov r1, r3
    757     b art_quick_throw_array_bounds
    758 END art_quick_aput_obj_with_bound_check
    759 
    760 #ifdef USE_READ_BARRIER
    761     .extern artReadBarrierSlow
    762 #endif
    763     .hidden art_quick_aput_obj
    764 ENTRY art_quick_aput_obj
    765 #ifdef USE_READ_BARRIER
    766     @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
    767     tst r2, r2
    768     beq .Ldo_aput_null
    769 #else
    770     cbz r2, .Ldo_aput_null
    771 #endif  // USE_READ_BARRIER
    772     READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
    773     READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
    774     READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
    775     cmp r3, ip  @ value's type == array's component type - trivial assignability
    776     bne .Lcheck_assignability
    777 .Ldo_aput:
    778     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    779     POISON_HEAP_REF r2
    780     str r2, [r3, r1, lsl #2]
    781     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    782     lsr r0, r0, #7
    783     strb r3, [r3, r0]
    784     blx lr
    785 .Ldo_aput_null:
    786     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    787     str r2, [r3, r1, lsl #2]
    788     blx lr
    789 .Lcheck_assignability:
    790     push {r0-r2, lr}             @ save arguments
    791     .cfi_adjust_cfa_offset 16
    792     .cfi_rel_offset r0, 0
    793     .cfi_rel_offset r1, 4
    794     .cfi_rel_offset r2, 8
    795     .cfi_rel_offset lr, 12
    796     mov r1, ip
    797     mov r0, r3
    798     bl artIsAssignableFromCode
    799     cbz r0, .Lthrow_array_store_exception
    800     pop {r0-r2, lr}
    801     .cfi_restore r0
    802     .cfi_restore r1
    803     .cfi_restore r2
    804     .cfi_restore lr
    805     .cfi_adjust_cfa_offset -16
    806     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    807     POISON_HEAP_REF r2
    808     str r2, [r3, r1, lsl #2]
    809     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    810     lsr r0, r0, #7
    811     strb r3, [r3, r0]
    812     blx lr
    813 .Lthrow_array_store_exception:
    814     pop {r0-r2, lr}
    815     /* No need to repeat restore cfi directives, the ones above apply here. */
    816     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r3, ip
    817     mov r1, r2
    818     mov r2, r9                     @ pass Thread::Current
    819     b artThrowArrayStoreException  @ (Class*, Class*, Thread*)
    820     bkpt                           @ unreached
    821 END art_quick_aput_obj
    822 
    823 // Macro to facilitate adding new allocation entrypoints.
    824 .macro ONE_ARG_DOWNCALL name, entrypoint, return
    825     .extern \entrypoint
    826 ENTRY \name
    827     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r1, r2  @ save callee saves in case of GC
    828     mov    r1, r9                     @ pass Thread::Current
    829     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    830     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    831     \return
    832 END \name
    833 .endm
    834 
    835 // Macro to facilitate adding new allocation entrypoints.
    836 .macro TWO_ARG_DOWNCALL name, entrypoint, return
    837     .extern \entrypoint
    838 ENTRY \name
    839     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
    840     mov    r2, r9                     @ pass Thread::Current
    841     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    842     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    843     \return
    844 END \name
    845 .endm
    846 
    847 // Macro to facilitate adding new array allocation entrypoints.
    848 .macro THREE_ARG_DOWNCALL name, entrypoint, return
    849     .extern \entrypoint
    850 ENTRY \name
    851     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r3, r12  @ save callee saves in case of GC
    852     mov    r3, r9                     @ pass Thread::Current
    853     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
    854     bl     \entrypoint
    855     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    856     \return
    857 END \name
    858 .endm
    859 
    860 // Macro to facilitate adding new allocation entrypoints.
    861 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
    862     .extern \entrypoint
    863 ENTRY \name
    864     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME_PRESERVE_RTEMP2  r12, r3  @ save callee saves in case of GC
    865     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
    866     .cfi_adjust_cfa_offset 16
    867     bl     \entrypoint
    868     add    sp, #16                    @ strip the extra frame
    869     .cfi_adjust_cfa_offset -16
    870     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    871     \return
    872 END \name
    873 .endm
    874 
    875 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    876 ONE_ARG_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    877 ONE_ARG_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    878 
    879     /*
    880      * Called by managed code to resolve a static field and load a non-wide value.
    881      */
    882 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    883 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    884 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    885 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    886 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    887 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    888     /*
    889      * Called by managed code to resolve a static field and load a 64-bit primitive value.
    890      */
    891     .extern artGet64StaticFromCode
    892 ENTRY art_quick_get64_static
    893     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ save callee saves in case of GC
    894     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    895     mov    r2, r9                        @ pass Thread::Current
    896     bl     artGet64StaticFromCode        @ (uint32_t field_idx, const Method* referrer, Thread*)
    897     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
    898     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    899     cbnz   r2, 1f                        @ success if no exception pending
    900     bx     lr                            @ return on success
    901 1:
    902     DELIVER_PENDING_EXCEPTION
    903 END art_quick_get64_static
    904 
    905     /*
    906      * Called by managed code to resolve an instance field and load a non-wide value.
    907      */
    908 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    909 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    910 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    911 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    912 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    913 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    914     /*
    915      * Called by managed code to resolve an instance field and load a 64-bit primitive value.
    916      */
    917     .extern artGet64InstanceFromCode
    918 ENTRY art_quick_get64_instance
    919     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
    920     ldr    r2, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    921     mov    r3, r9                        @ pass Thread::Current
    922     bl     artGet64InstanceFromCode      @ (field_idx, Object*, referrer, Thread*)
    923     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
    924     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
    925     cbnz   r2, 1f                        @ success if no exception pending
    926     bx     lr                            @ return on success
    927 1:
    928     DELIVER_PENDING_EXCEPTION
    929 END art_quick_get64_instance
    930 
    931     /*
    932      * Called by managed code to resolve a static field and store a non-wide value.
    933      */
    934 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    935 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    936 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    937 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    938     /*
    939      * Called by managed code to resolve a static field and store a 64-bit primitive value.
    940      * On entry r0 holds field index, r2:r3 hold new_val
    941      */
    942     .extern artSet64StaticFromCode
    943 ENTRY art_quick_set64_static
    944     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r12   @ save callee saves in case of GC
    945                                          @ r2:r3 contain the wide argument
    946     ldr    r1, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    947     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
    948     .cfi_adjust_cfa_offset 16
    949     bl     artSet64StaticFromCode        @ (field_idx, referrer, new_val, Thread*)
    950     add    sp, #16                       @ release out args
    951     .cfi_adjust_cfa_offset -16
    952     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
    953     RETURN_IF_RESULT_IS_ZERO
    954     DELIVER_PENDING_EXCEPTION
    955 END art_quick_set64_static
    956 
    957     /*
    958      * Called by managed code to resolve an instance field and store a non-wide value.
    959      */
    960 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    961 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    962 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    963 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    964     /*
    965      * Called by managed code to resolve an instance field and store a 64-bit primitive value.
    966      */
    967     .extern artSet64InstanceFromCode
    968 ENTRY art_quick_set64_instance
    969     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r12, lr  @ save callee saves in case of GC
    970                                          @ r2:r3 contain the wide argument
    971     ldr    r12, [sp, #FRAME_SIZE_REFS_ONLY_CALLEE_SAVE]  @ pass referrer
    972     str    r9, [sp, #-12]!               @ expand the frame and pass Thread::Current
    973     .cfi_adjust_cfa_offset 12
    974     str    r12, [sp, #-4]!               @ expand the frame and pass the referrer
    975     .cfi_adjust_cfa_offset 4
    976     bl     artSet64InstanceFromCode      @ (field_idx, Object*, new_val, Method* referrer, Thread*)
    977     add    sp, #16                       @ release out args
    978     .cfi_adjust_cfa_offset -16
    979     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME  @ TODO: we can clearly save an add here
    980     RETURN_IF_RESULT_IS_ZERO
    981     DELIVER_PENDING_EXCEPTION
    982 END art_quick_set64_instance
    983 
    984     /*
    985      * Entry from managed code to resolve a string, this stub will allocate a String and deliver an
    986      * exception on error. On success the String is returned. R0 holds the string index. The fast
    987      * path check for hit in strings cache has already been performed.
    988      */
    989 ONE_ARG_DOWNCALL art_quick_resolve_string, artResolveStringFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    990 
    991 // Generate the allocation entrypoints for each allocator.
    992 GENERATE_ALLOC_ENTRYPOINTS_FOR_EACH_ALLOCATOR
    993 
    994 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_rosalloc, RosAlloc).
    995 ENTRY art_quick_alloc_object_rosalloc
    996     // Fast path rosalloc allocation.
    997     // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
    998     // r2, r3, r12: free.
    999     ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
   1000                                                               // Load the class (r2)
   1001     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
   1002     cbz    r2, .Lart_quick_alloc_object_rosalloc_slow_path    // Check null class
   1003                                                               // Check class status.
   1004     ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
   1005     cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
   1006     bne    .Lart_quick_alloc_object_rosalloc_slow_path
   1007                                                               // Add a fake dependence from the
   1008                                                               // following access flag and size
   1009                                                               // loads to the status load.
   1010                                                               // This is to prevent those loads
   1011                                                               // from being reordered above the
   1012                                                               // status load and reading wrong
   1013                                                               // values (an alternative is to use
   1014                                                               // a load-acquire for the status).
   1015     eor    r3, r3, r3
   1016     add    r2, r2, r3
   1017                                                               // Check access flags has
   1018                                                               // kAccClassIsFinalizable
   1019     ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
   1020     tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
   1021     bne    .Lart_quick_alloc_object_rosalloc_slow_path
   1022 
   1023     ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
   1024                                                               // allocation stack has room.
   1025                                                               // TODO: consider using ldrd.
   1026     ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
   1027     cmp    r3, r12
   1028     bhs    .Lart_quick_alloc_object_rosalloc_slow_path
   1029 
   1030     ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3)
   1031     cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
   1032                                                               // local allocation
   1033     bhs    .Lart_quick_alloc_object_rosalloc_slow_path
   1034                                                               // Compute the rosalloc bracket index
   1035                                                               // from the size.
   1036                                                               // Align up the size by the rosalloc
   1037                                                               // bracket quantum size and divide
   1038                                                               // by the quantum size and subtract
   1039                                                               // by 1. This code is a shorter but
   1040                                                               // equivalent version.
   1041     sub    r3, r3, #1
   1042     lsr    r3, r3, #ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT
   1043                                                               // Load the rosalloc run (r12)
   1044     add    r12, r9, r3, lsl #POINTER_SIZE_SHIFT
   1045     ldr    r12, [r12, #THREAD_ROSALLOC_RUNS_OFFSET]
   1046                                                               // Load the free list head (r3). This
   1047                                                               // will be the return val.
   1048     ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1049     cbz    r3, .Lart_quick_alloc_object_rosalloc_slow_path
   1050     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1051     ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
   1052                                                               // and update the list head with the
   1053                                                               // next pointer.
   1054     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1055                                                               // Store the class pointer in the
   1056                                                               // header. This also overwrites the
   1057                                                               // next pointer. The offsets are
   1058                                                               // asserted to match.
   1059 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
   1060 #error "Class pointer needs to overwrite next pointer."
   1061 #endif
   1062     POISON_HEAP_REF r2
   1063     str    r2, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
   1064                                                               // Push the new object onto the thread
   1065                                                               // local allocation stack and
   1066                                                               // increment the thread local
   1067                                                               // allocation stack top.
   1068     ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1069     str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
   1070     str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1071                                                               // Decrement the size of the free list
   1072     ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1073     sub    r1, #1
   1074                                                               // TODO: consider combining this store
   1075                                                               // and the list head store above using
   1076                                                               // strd.
   1077     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1078                                                               // Fence. This is "ish" not "ishst" so
   1079                                                               // that the code after this allocation
   1080                                                               // site will see the right values in
   1081                                                               // the fields of the class.
   1082                                                               // Alternatively we could use "ishst"
   1083                                                               // if we use load-acquire for the
   1084                                                               // class status load.)
   1085     dmb    ish
   1086     mov    r0, r3                                             // Set the return value and return.
   1087     bx     lr
   1088 
   1089 .Lart_quick_alloc_object_rosalloc_slow_path:
   1090     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3  @ save callee saves in case of GC
   1091     mov    r2, r9                     @ pass Thread::Current
   1092     bl     artAllocObjectFromCodeRosAlloc     @ (uint32_t type_idx, Method* method, Thread*)
   1093     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1094     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1095 END art_quick_alloc_object_rosalloc
   1096 
   1097 // The common fast path code for art_quick_alloc_object_tlab and art_quick_alloc_object_region_tlab.
   1098 //
   1099 // r0: type_idx/return value, r1: ArtMethod*, r2: class, r9: Thread::Current, r3, r12: free.
   1100 // Need to preserve r0 and r1 to the slow path.
   1101 .macro ALLOC_OBJECT_TLAB_FAST_PATH slowPathLabel
   1102     cbz    r2, \slowPathLabel                                 // Check null class
   1103                                                               // Check class status.
   1104     ldr    r3, [r2, #MIRROR_CLASS_STATUS_OFFSET]
   1105     cmp    r3, #MIRROR_CLASS_STATUS_INITIALIZED
   1106     bne    \slowPathLabel
   1107                                                               // Add a fake dependence from the
   1108                                                               // following access flag and size
   1109                                                               // loads to the status load.
   1110                                                               // This is to prevent those loads
   1111                                                               // from being reordered above the
   1112                                                               // status load and reading wrong
   1113                                                               // values (an alternative is to use
   1114                                                               // a load-acquire for the status).
   1115     eor    r3, r3, r3
   1116     add    r2, r2, r3
   1117                                                               // Check access flags has
   1118                                                               // kAccClassIsFinalizable.
   1119     ldr    r3, [r2, #MIRROR_CLASS_ACCESS_FLAGS_OFFSET]
   1120     tst    r3, #ACCESS_FLAGS_CLASS_IS_FINALIZABLE
   1121     bne    \slowPathLabel
   1122                                                               // Load thread_local_pos (r12) and
   1123                                                               // thread_local_end (r3) with ldrd.
   1124                                                               // Check constraints for ldrd.
   1125 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
   1126 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
   1127 #endif
   1128     ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
   1129     sub    r12, r3, r12                                       // Compute the remaining buf size.
   1130     ldr    r3, [r2, #MIRROR_CLASS_OBJECT_SIZE_OFFSET]         // Load the object size (r3).
   1131     cmp    r3, r12                                            // Check if it fits. OK to do this
   1132                                                               // before rounding up the object size
   1133                                                               // assuming the buf size alignment.
   1134     bhi    \slowPathLabel
   1135     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1136                                                               // Round up the object size by the
   1137                                                               // object alignment. (addr + 7) & ~7.
   1138     add    r3, r3, #OBJECT_ALIGNMENT_MASK
   1139     and    r3, r3, #OBJECT_ALIGNMENT_MASK_TOGGLED
   1140                                                               // Reload old thread_local_pos (r0)
   1141                                                               // for the return value.
   1142     ldr    r0, [r9, #THREAD_LOCAL_POS_OFFSET]
   1143     add    r1, r0, r3
   1144     str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
   1145     ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
   1146     add    r1, r1, #1
   1147     str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
   1148     POISON_HEAP_REF r2
   1149     str    r2, [r0, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1150                                                               // Fence. This is "ish" not "ishst" so
   1151                                                               // that the code after this allocation
   1152                                                               // site will see the right values in
   1153                                                               // the fields of the class.
   1154                                                               // Alternatively we could use "ishst"
   1155                                                               // if we use load-acquire for the
   1156                                                               // class status load.)
   1157     dmb    ish
   1158     bx     lr
   1159 .endm
   1160 
   1161 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_tlab, TLAB).
   1162 ENTRY art_quick_alloc_object_tlab
   1163     // Fast path tlab allocation.
   1164     // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current
   1165     // r2, r3, r12: free.
   1166 #if defined(USE_READ_BARRIER)
   1167     mvn    r0, #0                                             // Read barrier not supported here.
   1168     bx     lr                                                 // Return -1.
   1169 #endif
   1170     ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
   1171                                                               // Load the class (r2)
   1172     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
   1173     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_tlab_slow_path
   1174 .Lart_quick_alloc_object_tlab_slow_path:
   1175     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
   1176     mov    r2, r9                                             // Pass Thread::Current.
   1177     bl     artAllocObjectFromCodeTLAB    // (uint32_t type_idx, Method* method, Thread*)
   1178     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1179     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1180 END art_quick_alloc_object_tlab
   1181 
   1182 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT(_region_tlab, RegionTLAB)
   1183 ENTRY art_quick_alloc_object_region_tlab
   1184     // Fast path tlab allocation.
   1185     // r0: type_idx/return value, r1: ArtMethod*, r9: Thread::Current, r2, r3, r12: free.
   1186 #if !defined(USE_READ_BARRIER)
   1187     eor    r0, r0, r0                                         // Read barrier must be enabled here.
   1188     sub    r0, r0, #1                                         // Return -1.
   1189     bx     lr
   1190 #endif
   1191     ldr    r2, [r1, #ART_METHOD_DEX_CACHE_TYPES_OFFSET_32]    // Load dex cache resolved types array
   1192                                                               // Load the class (r2)
   1193     ldr    r2, [r2, r0, lsl #COMPRESSED_REFERENCE_SIZE_SHIFT]
   1194                                                               // Read barrier for class load.
   1195     ldr    r3, [r9, #THREAD_IS_GC_MARKING_OFFSET]
   1196     cbnz   r3, .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path
   1197 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit:
   1198     ALLOC_OBJECT_TLAB_FAST_PATH .Lart_quick_alloc_object_region_tlab_slow_path
   1199 .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path:
   1200                                                               // The read barrier slow path. Mark
   1201                                                               // the class.
   1202     push   {r0, r1, r3, lr}                                   // Save registers. r3 is pushed only
   1203                                                               // to align sp by 16 bytes.
   1204     mov    r0, r2                                             // Pass the class as the first param.
   1205     bl     artReadBarrierMark
   1206     mov    r2, r0                                             // Get the (marked) class back.
   1207     pop    {r0, r1, r3, lr}
   1208     b      .Lart_quick_alloc_object_region_tlab_class_load_read_barrier_slow_path_exit
   1209 .Lart_quick_alloc_object_region_tlab_slow_path:
   1210     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME  r2, r3                 // Save callee saves in case of GC.
   1211     mov    r2, r9                                             // Pass Thread::Current.
   1212     bl     artAllocObjectFromCodeRegionTLAB    // (uint32_t type_idx, Method* method, Thread*)
   1213     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1214     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1215 END art_quick_alloc_object_region_tlab
   1216 
   1217     /*
   1218      * Called by managed code when the value in rSUSPEND has been decremented to 0.
   1219      */
   1220     .extern artTestSuspendFromCode
   1221 ENTRY art_quick_test_suspend
   1222 #ifdef ARM_R4_SUSPEND_FLAG
   1223     ldrh   r0, [rSELF, #THREAD_FLAGS_OFFSET]
   1224     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL  @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
   1225     cbnz   r0, 1f                             @ check Thread::Current()->suspend_count_ == 0
   1226     bx     lr                                 @ return if suspend_count_ == 0
   1227 1:
   1228 #endif
   1229     mov    r0, rSELF
   1230     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for GC stack crawl
   1231     @ TODO: save FPRs to enable access in the debugger?
   1232     bl     artTestSuspendFromCode             @ (Thread*)
   1233     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
   1234 END art_quick_test_suspend
   1235 
   1236 ENTRY art_quick_implicit_suspend
   1237     mov    r0, rSELF
   1238     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r1, r2   @ save callee saves for stack crawl
   1239     bl     artTestSuspendFromCode             @ (Thread*)
   1240     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME_AND_RETURN
   1241 END art_quick_implicit_suspend
   1242 
   1243     /*
   1244      * Called by managed code that is attempting to call a method on a proxy class. On entry
   1245      * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
   1246      * frame size of the invoked proxy method agrees with a ref and args callee save frame.
   1247      */
   1248      .extern artQuickProxyInvokeHandler
   1249 ENTRY art_quick_proxy_invoke_handler
   1250     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
   1251     mov     r2, r9                 @ pass Thread::Current
   1252     mov     r3, sp                 @ pass SP
   1253     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
   1254     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1255     // Tear down the callee-save frame. Skip arg registers.
   1256     add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
   1257     .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
   1258     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1259     cbnz    r2, 1f                 @ success if no exception is pending
   1260     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1261     bx      lr                     @ return on success
   1262 1:
   1263     DELIVER_PENDING_EXCEPTION
   1264 END art_quick_proxy_invoke_handler
   1265 
   1266     /*
   1267      * Called to resolve an imt conflict.
   1268      * r0 is the conflict ArtMethod.
   1269      * r12 is a hidden argument that holds the target interface method's dex method index.
   1270      *
   1271      * Note that this stub writes to r0, r4, and r12.
   1272      */
   1273 ENTRY art_quick_imt_conflict_trampoline
   1274     ldr r4, [sp, #0]  // Load referrer
   1275     ldr r4, [r4, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_32]   // Load dex cache methods array
   1276     ldr r12, [r4, r12, lsl #POINTER_SIZE_SHIFT]  // Load interface method
   1277     ldr r0, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
   1278     ldr r4, [r0]  // Load first entry in ImtConflictTable.
   1279 .Limt_table_iterate:
   1280     cmp r4, r12
   1281     // Branch if found. Benchmarks have shown doing a branch here is better.
   1282     beq .Limt_table_found
   1283     // If the entry is null, the interface method is not in the ImtConflictTable.
   1284     cbz r4, .Lconflict_trampoline
   1285     // Iterate over the entries of the ImtConflictTable.
   1286     ldr r4, [r0, #(2 * __SIZEOF_POINTER__)]!
   1287     b .Limt_table_iterate
   1288 .Limt_table_found:
   1289     // We successfully hit an entry in the table. Load the target method
   1290     // and jump to it.
   1291     ldr r0, [r0, #__SIZEOF_POINTER__]
   1292     ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
   1293 .Lconflict_trampoline:
   1294     // Call the runtime stub to populate the ImtConflictTable and jump to the
   1295     // resolved method.
   1296     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
   1297 END art_quick_imt_conflict_trampoline
   1298 
   1299     .extern artQuickResolutionTrampoline
   1300 ENTRY art_quick_resolution_trampoline
   1301     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
   1302     mov     r2, r9                 @ pass Thread::Current
   1303     mov     r3, sp                 @ pass SP
   1304     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
   1305     cbz     r0, 1f                 @ is code pointer null? goto exception
   1306     mov     r12, r0
   1307     ldr  r0, [sp, #0]              @ load resolved method in r0
   1308     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1309     bx      r12                    @ tail-call into actual code
   1310 1:
   1311     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1312     DELIVER_PENDING_EXCEPTION
   1313 END art_quick_resolution_trampoline
   1314 
   1315     /*
   1316      * Called to do a generic JNI down-call
   1317      */
   1318 ENTRY art_quick_generic_jni_trampoline
   1319     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME_WITH_METHOD_IN_R0
   1320 
   1321     // Save rSELF
   1322     mov r11, rSELF
   1323     // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
   1324     mov r10, sp
   1325     .cfi_def_cfa_register r10
   1326 
   1327     sub sp, sp, #5120
   1328 
   1329     // prepare for artQuickGenericJniTrampoline call
   1330     // (Thread*,  SP)
   1331     //    r0      r1   <= C calling convention
   1332     //  rSELF     r10  <= where they are
   1333 
   1334     mov r0, rSELF   // Thread*
   1335     mov r1, r10
   1336     blx artQuickGenericJniTrampoline  // (Thread*, sp)
   1337 
   1338     // The C call will have registered the complete save-frame on success.
   1339     // The result of the call is:
   1340     // r0: pointer to native code, 0 on error.
   1341     // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
   1342 
   1343     // Check for error = 0.
   1344     cbz r0, .Lexception_in_native
   1345 
   1346     // Release part of the alloca.
   1347     mov sp, r1
   1348 
   1349     // Save the code pointer
   1350     mov r12, r0
   1351 
   1352     // Load parameters from frame into registers.
   1353     pop {r0-r3}
   1354 
   1355     // Softfloat.
   1356     // TODO: Change to hardfloat when supported.
   1357 
   1358     blx r12           // native call.
   1359 
   1360     // result sign extension is handled in C code
   1361     // prepare for artQuickGenericJniEndTrampoline call
   1362     // (Thread*, result, result_f)
   1363     //    r0      r2,r3    stack       <= C calling convention
   1364     //    r11     r0,r1    r0,r1          <= where they are
   1365     sub sp, sp, #8 // Stack alignment.
   1366 
   1367     push {r0-r1}
   1368     mov r3, r1
   1369     mov r2, r0
   1370     mov r0, r11
   1371 
   1372     blx artQuickGenericJniEndTrampoline
   1373 
   1374     // Restore self pointer.
   1375     mov r9, r11
   1376 
   1377     // Pending exceptions possible.
   1378     ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1379     cbnz r2, .Lexception_in_native
   1380 
   1381     // Tear down the alloca.
   1382     mov sp, r10
   1383     .cfi_def_cfa_register sp
   1384 
   1385     // Tear down the callee-save frame. Skip arg registers.
   1386     add     sp, #FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
   1387     .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
   1388     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1389 
   1390     // store into fpr, for when it's a fpr return...
   1391     vmov d0, r0, r1
   1392     bx lr      // ret
   1393     // Undo the unwinding information from above since it doesn't apply below.
   1394     .cfi_def_cfa_register r10
   1395     .cfi_adjust_cfa_offset FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE-FRAME_SIZE_REFS_ONLY_CALLEE_SAVE
   1396 
   1397 .Lexception_in_native:
   1398     ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
   1399     .cfi_def_cfa_register sp
   1400     # This will create a new save-all frame, required by the runtime.
   1401     DELIVER_PENDING_EXCEPTION
   1402 END art_quick_generic_jni_trampoline
   1403 
   1404     .extern artQuickToInterpreterBridge
   1405 ENTRY art_quick_to_interpreter_bridge
   1406     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r1, r2
   1407     mov     r1, r9                 @ pass Thread::Current
   1408     mov     r2, sp                 @ pass SP
   1409     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
   1410     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1411     // Tear down the callee-save frame. Skip arg registers.
   1412     add     sp, #(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
   1413     .cfi_adjust_cfa_offset -(FRAME_SIZE_REFS_AND_ARGS_CALLEE_SAVE - FRAME_SIZE_REFS_ONLY_CALLEE_SAVE)
   1414     RESTORE_REFS_ONLY_CALLEE_SAVE_FRAME
   1415     cbnz    r2, 1f                 @ success if no exception is pending
   1416     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1417     bx      lr                     @ return on success
   1418 1:
   1419     DELIVER_PENDING_EXCEPTION
   1420 END art_quick_to_interpreter_bridge
   1421 
   1422     /*
   1423      * Routine that intercepts method calls and returns.
   1424      */
   1425     .extern artInstrumentationMethodEntryFromCode
   1426     .extern artInstrumentationMethodExitFromCode
   1427 ENTRY art_quick_instrumentation_entry
   1428     @ Make stack crawlable and clobber r2 and r3 (post saving)
   1429     SETUP_REFS_AND_ARGS_CALLEE_SAVE_FRAME r2, r3
   1430     @ preserve r0 (not normally an arg) knowing there is a spare slot in kRefsAndArgs.
   1431     str   r0, [sp, #4]
   1432     mov   r2, r9         @ pass Thread::Current
   1433     mov   r3, lr         @ pass LR
   1434     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, LR)
   1435     mov   r12, r0        @ r12 holds reference to code
   1436     ldr   r0, [sp, #4]   @ restore r0
   1437     RESTORE_REFS_AND_ARGS_CALLEE_SAVE_FRAME
   1438     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
   1439 @ Deliberate fall-through into art_quick_instrumentation_exit.
   1440     .type art_quick_instrumentation_exit, #function
   1441     .global art_quick_instrumentation_exit
   1442 art_quick_instrumentation_exit:
   1443     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
   1444     SETUP_REFS_ONLY_CALLEE_SAVE_FRAME r2, r3  @ set up frame knowing r2 and r3 must be dead on exit
   1445     mov   r12, sp        @ remember bottom of caller's frame
   1446     push  {r0-r1}        @ save return value
   1447     .cfi_adjust_cfa_offset 8
   1448     .cfi_rel_offset r0, 0
   1449     .cfi_rel_offset r1, 4
   1450     vpush {d0}           @ save fp return value
   1451     .cfi_adjust_cfa_offset 8
   1452     sub   sp, #8         @ space for return value argument. Note: AAPCS stack alignment is 8B, no
   1453                          @ need to align by 16.
   1454     .cfi_adjust_cfa_offset 8
   1455     vstr  d0, [sp]       @ d0 -> [sp] for fpr_res
   1456     mov   r2, r0         @ pass return value as gpr_res
   1457     mov   r3, r1
   1458     mov   r0, r9         @ pass Thread::Current
   1459     mov   r1, r12        @ pass SP
   1460     blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res, fpr_res)
   1461     add   sp, #8
   1462     .cfi_adjust_cfa_offset -8
   1463 
   1464     mov   r2, r0         @ link register saved by instrumentation
   1465     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
   1466     vpop  {d0}           @ restore fp return value
   1467     .cfi_adjust_cfa_offset -8
   1468     pop   {r0, r1}       @ restore return value
   1469     .cfi_adjust_cfa_offset -8
   1470     .cfi_restore r0
   1471     .cfi_restore r1
   1472     add sp, #32          @ remove callee save frame
   1473     .cfi_adjust_cfa_offset -32
   1474     bx    r2             @ return
   1475 END art_quick_instrumentation_entry
   1476 
   1477     /*
   1478      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   1479      * will long jump to the upcall with a special exception of -1.
   1480      */
   1481     .extern artDeoptimize
   1482 ENTRY art_quick_deoptimize
   1483     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
   1484     mov    r0, r9         @ Set up args.
   1485     blx    artDeoptimize  @ artDeoptimize(Thread*)
   1486 END art_quick_deoptimize
   1487 
   1488     /*
   1489      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
   1490      * will long jump to the interpreter bridge.
   1491      */
   1492     .extern artDeoptimizeFromCompiledCode
   1493 ENTRY art_quick_deoptimize_from_compiled_code
   1494     SETUP_SAVE_ALL_CALLEE_SAVE_FRAME r0, r1
   1495     mov    r0, r9                         @ Set up args.
   1496     blx    artDeoptimizeFromCompiledCode  @ artDeoptimizeFromCompiledCode(Thread*)
   1497 END art_quick_deoptimize_from_compiled_code
   1498 
   1499     /*
   1500      * Signed 64-bit integer multiply.
   1501      *
   1502      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
   1503      *        WX
   1504      *      x YZ
   1505      *  --------
   1506      *     ZW ZX
   1507      *  YW YX
   1508      *
   1509      * The low word of the result holds ZX, the high word holds
   1510      * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
   1511      * it doesn't fit in the low 64 bits.
   1512      *
   1513      * Unlike most ARM math operations, multiply instructions have
   1514      * restrictions on using the same register more than once (Rd and Rm
   1515      * cannot be the same).
   1516      */
   1517     /* mul-long vAA, vBB, vCC */
   1518 ENTRY art_quick_mul_long
   1519     push    {r9 - r10}
   1520     .cfi_adjust_cfa_offset 8
   1521     .cfi_rel_offset r9, 0
   1522     .cfi_rel_offset r10, 4
   1523     mul     ip, r2, r1                  @  ip<- ZxW
   1524     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
   1525     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
   1526     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
   1527     mov     r0,r9
   1528     mov     r1,r10
   1529     pop     {r9 - r10}
   1530     .cfi_adjust_cfa_offset -8
   1531     .cfi_restore r9
   1532     .cfi_restore r10
   1533     bx      lr
   1534 END art_quick_mul_long
   1535 
   1536     /*
   1537      * Long integer shift.  This is different from the generic 32/64-bit
   1538      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1539      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1540      * 6 bits.
   1541      * On entry:
   1542      *   r0: low word
   1543      *   r1: high word
   1544      *   r2: shift count
   1545      */
   1546     /* shl-long vAA, vBB, vCC */
   1547 ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
   1548     and     r2, r2, #63                 @ r2<- r2 & 0x3f
   1549     mov     r1, r1, asl r2              @  r1<- r1 << r2
   1550     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1551     orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
   1552     subs    ip, r2, #32                 @  ip<- r2 - 32
   1553     movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
   1554     mov     r0, r0, asl r2              @  r0<- r0 << r2
   1555     bx      lr
   1556 END art_quick_shl_long
   1557 
   1558     /*
   1559      * Long integer shift.  This is different from the generic 32/64-bit
   1560      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1561      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1562      * 6 bits.
   1563      * On entry:
   1564      *   r0: low word
   1565      *   r1: high word
   1566      *   r2: shift count
   1567      */
   1568     /* shr-long vAA, vBB, vCC */
   1569 ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
   1570     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   1571     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   1572     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1573     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   1574     subs    ip, r2, #32                 @  ip<- r2 - 32
   1575     movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
   1576     mov     r1, r1, asr r2              @  r1<- r1 >> r2
   1577     bx      lr
   1578 END art_quick_shr_long
   1579 
   1580     /*
   1581      * Long integer shift.  This is different from the generic 32/64-bit
   1582      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1583      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1584      * 6 bits.
   1585      * On entry:
   1586      *   r0: low word
   1587      *   r1: high word
   1588      *   r2: shift count
   1589      */
   1590     /* ushr-long vAA, vBB, vCC */
   1591 ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
   1592     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   1593     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   1594     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1595     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   1596     subs    ip, r2, #32                 @  ip<- r2 - 32
   1597     movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
   1598     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
   1599     bx      lr
   1600 END art_quick_ushr_long
   1601 
   1602     /*
   1603      * String's indexOf.
   1604      *
   1605      * On entry:
   1606      *    r0:   string object (known non-null)
   1607      *    r1:   char to match (known <= 0xFFFF)
   1608      *    r2:   Starting offset in string data
   1609      */
   1610 ENTRY art_quick_indexof
   1611     push {r4, r10-r11, lr} @ 4 words of callee saves
   1612     .cfi_adjust_cfa_offset 16
   1613     .cfi_rel_offset r4, 0
   1614     .cfi_rel_offset r10, 4
   1615     .cfi_rel_offset r11, 8
   1616     .cfi_rel_offset lr, 12
   1617     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
   1618     add   r0, #MIRROR_STRING_VALUE_OFFSET
   1619 
   1620     /* Clamp start to [0..count] */
   1621     cmp   r2, #0
   1622     it    lt
   1623     movlt r2, #0
   1624     cmp   r2, r3
   1625     it    gt
   1626     movgt r2, r3
   1627 
   1628     /* Save a copy in r12 to later compute result */
   1629     mov   r12, r0
   1630 
   1631     /* Build pointer to start of data to compare and pre-bias */
   1632     add   r0, r0, r2, lsl #1
   1633     sub   r0, #2
   1634 
   1635     /* Compute iteration count */
   1636     sub   r2, r3, r2
   1637 
   1638     /*
   1639      * At this point we have:
   1640      *   r0: start of data to test
   1641      *   r1: char to compare
   1642      *   r2: iteration count
   1643      *   r12: original start of string data
   1644      *   r3, r4, r10, r11 available for loading string data
   1645      */
   1646 
   1647     subs  r2, #4
   1648     blt   .Lindexof_remainder
   1649 
   1650 .Lindexof_loop4:
   1651     ldrh  r3, [r0, #2]!
   1652     ldrh  r4, [r0, #2]!
   1653     ldrh  r10, [r0, #2]!
   1654     ldrh  r11, [r0, #2]!
   1655     cmp   r3, r1
   1656     beq   .Lmatch_0
   1657     cmp   r4, r1
   1658     beq   .Lmatch_1
   1659     cmp   r10, r1
   1660     beq   .Lmatch_2
   1661     cmp   r11, r1
   1662     beq   .Lmatch_3
   1663     subs  r2, #4
   1664     bge   .Lindexof_loop4
   1665 
   1666 .Lindexof_remainder:
   1667     adds  r2, #4
   1668     beq   .Lindexof_nomatch
   1669 
   1670 .Lindexof_loop1:
   1671     ldrh  r3, [r0, #2]!
   1672     cmp   r3, r1
   1673     beq   .Lmatch_3
   1674     subs  r2, #1
   1675     bne   .Lindexof_loop1
   1676 
   1677 .Lindexof_nomatch:
   1678     mov   r0, #-1
   1679     pop {r4, r10-r11, pc}
   1680 
   1681 .Lmatch_0:
   1682     sub   r0, #6
   1683     sub   r0, r12
   1684     asr   r0, r0, #1
   1685     pop {r4, r10-r11, pc}
   1686 .Lmatch_1:
   1687     sub   r0, #4
   1688     sub   r0, r12
   1689     asr   r0, r0, #1
   1690     pop {r4, r10-r11, pc}
   1691 .Lmatch_2:
   1692     sub   r0, #2
   1693     sub   r0, r12
   1694     asr   r0, r0, #1
   1695     pop {r4, r10-r11, pc}
   1696 .Lmatch_3:
   1697     sub   r0, r12
   1698     asr   r0, r0, #1
   1699     pop {r4, r10-r11, pc}
   1700 END art_quick_indexof
   1701 
   1702    /*
   1703      * String's compareTo.
   1704      *
   1705      * Requires rARG0/rARG1 to have been previously checked for null.  Will
   1706      * return negative if this's string is < comp, 0 if they are the
   1707      * same and positive if >.
   1708      *
   1709      * On entry:
   1710      *    r0:   this object pointer
   1711      *    r1:   comp object pointer
   1712      *
   1713      */
   1714     .extern __memcmp16
   1715 ENTRY art_quick_string_compareto
   1716     mov    r2, r0         @ this to r2, opening up r0 for return value
   1717     sub    r0, r2, r1     @ Same?
   1718     cbnz   r0,1f
   1719     bx     lr
   1720 1:                        @ Same strings, return.
   1721 
   1722     push {r4, r7-r12, lr} @ 8 words - keep alignment
   1723     .cfi_adjust_cfa_offset 32
   1724     .cfi_rel_offset r4, 0
   1725     .cfi_rel_offset r7, 4
   1726     .cfi_rel_offset r8, 8
   1727     .cfi_rel_offset r9, 12
   1728     .cfi_rel_offset r10, 16
   1729     .cfi_rel_offset r11, 20
   1730     .cfi_rel_offset r12, 24
   1731     .cfi_rel_offset lr, 28
   1732 
   1733     ldr    r7, [r2, #MIRROR_STRING_COUNT_OFFSET]
   1734     ldr    r10, [r1, #MIRROR_STRING_COUNT_OFFSET]
   1735     add    r2, #MIRROR_STRING_VALUE_OFFSET
   1736     add    r1, #MIRROR_STRING_VALUE_OFFSET
   1737 
   1738     /*
   1739      * At this point, we have:
   1740      *    value:  r2/r1
   1741      *    offset: r4/r9
   1742      *    count:  r7/r10
   1743      * We're going to compute
   1744      *    r11 <- countDiff
   1745      *    r10 <- minCount
   1746      */
   1747      subs  r11, r7, r10
   1748      it    ls
   1749      movls r10, r7
   1750 
   1751      /*
   1752       * Note: data pointers point to previous element so we can use pre-index
   1753       * mode with base writeback.
   1754       */
   1755      subs  r2, #2   @ offset to contents[-1]
   1756      subs  r1, #2   @ offset to contents[-1]
   1757 
   1758      /*
   1759       * At this point we have:
   1760       *   r2: *this string data
   1761       *   r1: *comp string data
   1762       *   r10: iteration count for comparison
   1763       *   r11: value to return if the first part of the string is equal
   1764       *   r0: reserved for result
   1765       *   r3, r4, r7, r8, r9, r12 available for loading string data
   1766       */
   1767 
   1768     subs  r10, #2
   1769     blt   .Ldo_remainder2
   1770 
   1771       /*
   1772        * Unroll the first two checks so we can quickly catch early mismatch
   1773        * on long strings (but preserve incoming alignment)
   1774        */
   1775 
   1776     ldrh  r3, [r2, #2]!
   1777     ldrh  r4, [r1, #2]!
   1778     ldrh  r7, [r2, #2]!
   1779     ldrh  r8, [r1, #2]!
   1780     subs  r0, r3, r4
   1781     it    eq
   1782     subseq  r0, r7, r8
   1783     bne   .Ldone
   1784     cmp   r10, #28
   1785     bgt   .Ldo_memcmp16
   1786     subs  r10, #3
   1787     blt   .Ldo_remainder
   1788 
   1789 .Lloopback_triple:
   1790     ldrh  r3, [r2, #2]!
   1791     ldrh  r4, [r1, #2]!
   1792     ldrh  r7, [r2, #2]!
   1793     ldrh  r8, [r1, #2]!
   1794     ldrh  r9, [r2, #2]!
   1795     ldrh  r12,[r1, #2]!
   1796     subs  r0, r3, r4
   1797     it    eq
   1798     subseq  r0, r7, r8
   1799     it    eq
   1800     subseq  r0, r9, r12
   1801     bne   .Ldone
   1802     subs  r10, #3
   1803     bge   .Lloopback_triple
   1804 
   1805 .Ldo_remainder:
   1806     adds  r10, #3
   1807     beq   .Lreturn_diff
   1808 
   1809 .Lloopback_single:
   1810     ldrh  r3, [r2, #2]!
   1811     ldrh  r4, [r1, #2]!
   1812     subs  r0, r3, r4
   1813     bne   .Ldone
   1814     subs  r10, #1
   1815     bne   .Lloopback_single
   1816 
   1817 .Lreturn_diff:
   1818     mov   r0, r11
   1819     pop   {r4, r7-r12, pc}
   1820 
   1821 .Ldo_remainder2:
   1822     adds  r10, #2
   1823     bne   .Lloopback_single
   1824     mov   r0, r11
   1825     pop   {r4, r7-r12, pc}
   1826 
   1827     /* Long string case */
   1828 .Ldo_memcmp16:
   1829     mov   r7, r11
   1830     add   r0, r2, #2
   1831     add   r1, r1, #2
   1832     mov   r2, r10
   1833     bl    __memcmp16
   1834     cmp   r0, #0
   1835     it    eq
   1836     moveq r0, r7
   1837 .Ldone:
   1838     pop   {r4, r7-r12, pc}
   1839 END art_quick_string_compareto
   1840 
   1841     /* Assembly routines used to handle ABI differences. */
   1842 
   1843     /* double fmod(double a, double b) */
   1844     .extern fmod
   1845 ENTRY art_quick_fmod
   1846     push  {lr}
   1847     .cfi_adjust_cfa_offset 4
   1848     .cfi_rel_offset lr, 0
   1849     sub   sp, #4
   1850     .cfi_adjust_cfa_offset 4
   1851     vmov  r0, r1, d0
   1852     vmov  r2, r3, d1
   1853     bl    fmod
   1854     vmov  d0, r0, r1
   1855     add   sp, #4
   1856     .cfi_adjust_cfa_offset -4
   1857     pop   {pc}
   1858 END art_quick_fmod
   1859 
   1860     /* float fmodf(float a, float b) */
   1861      .extern fmodf
   1862 ENTRY art_quick_fmodf
   1863     push  {lr}
   1864     .cfi_adjust_cfa_offset 4
   1865     .cfi_rel_offset lr, 0
   1866     sub   sp, #4
   1867     .cfi_adjust_cfa_offset 4
   1868     vmov  r0, r1, d0
   1869     bl    fmodf
   1870     vmov  s0, r0
   1871     add   sp, #4
   1872     .cfi_adjust_cfa_offset -4
   1873     pop   {pc}
   1874 END art_quick_fmod
   1875 
   1876     /* int64_t art_d2l(double d) */
   1877     .extern art_d2l
   1878 ENTRY art_quick_d2l
   1879     vmov  r0, r1, d0
   1880     b     art_d2l
   1881 END art_quick_d2l
   1882 
   1883     /* int64_t art_f2l(float f) */
   1884     .extern art_f2l
   1885 ENTRY art_quick_f2l
   1886     vmov  r0, s0
   1887     b     art_f2l
   1888 END art_quick_f2l
   1889 
   1890     /* float art_l2f(int64_t l) */
   1891     .extern art_l2f
   1892 ENTRY art_quick_l2f
   1893     push  {lr}
   1894     .cfi_adjust_cfa_offset 4
   1895     .cfi_rel_offset lr, 0
   1896     sub   sp, #4
   1897     .cfi_adjust_cfa_offset 4
   1898     bl    art_l2f
   1899     vmov  s0, r0
   1900     add   sp, #4
   1901     .cfi_adjust_cfa_offset -4
   1902     pop   {pc}
   1903 END art_quick_l2f
   1904