Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_arm.S"
     18 
     19 #include "arch/quick_alloc_entrypoints.S"
     20 
     21     /* Deliver the given exception */
     22     .extern artDeliverExceptionFromCode
     23     /* Deliver an exception pending on a thread */
     24     .extern artDeliverPendingException
     25 
     26     /*
     27      * Macro to spill the GPRs.
     28      */
     29 .macro SPILL_ALL_CALLEE_SAVE_GPRS
     30     push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
     31     .cfi_adjust_cfa_offset 36
     32     .cfi_rel_offset r4, 0
     33     .cfi_rel_offset r5, 4
     34     .cfi_rel_offset r6, 8
     35     .cfi_rel_offset r7, 12
     36     .cfi_rel_offset r8, 16
     37     .cfi_rel_offset r9, 20
     38     .cfi_rel_offset r10, 24
     39     .cfi_rel_offset r11, 28
     40     .cfi_rel_offset lr, 32
     41 .endm
     42 
     43     /*
     44      * Macro that sets up the callee save frame to conform with
     45      * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
     46      */
     47 .macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
     48     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     49     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     50     .cfi_adjust_cfa_offset 64
     51     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     52     .cfi_adjust_cfa_offset 12
     53     RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
     54     @ Load kSaveAllCalleeSaves Method* into rTemp.
     55     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
     56     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     57     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     58 
     59      // Ugly compile-time check, but we only have the preprocessor.
     60 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
     61 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
     62 #endif
     63 .endm
     64 
     65     /*
     66      * Macro that sets up the callee save frame to conform with
     67      * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
     68      */
     69 .macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
     70     // Note: We could avoid saving R8 in the case of Baker read
     71     // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
     72     // later; but it's not worth handling this special case.
     73     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     74     .cfi_adjust_cfa_offset 28
     75     .cfi_rel_offset r5, 0
     76     .cfi_rel_offset r6, 4
     77     .cfi_rel_offset r7, 8
     78     .cfi_rel_offset r8, 12
     79     .cfi_rel_offset r10, 16
     80     .cfi_rel_offset r11, 20
     81     .cfi_rel_offset lr, 24
     82     sub sp, #4                                    @ bottom word will hold Method*
     83     .cfi_adjust_cfa_offset 4
     84     RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
     85     @ Load kSaveRefsOnly Method* into rTemp.
     86     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
     87     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     88     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     89 
     90     // Ugly compile-time check, but we only have the preprocessor.
     91 #if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
     92 #error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
     93 #endif
     94 .endm
     95 
     96 .macro RESTORE_SAVE_REFS_ONLY_FRAME
     97     add sp, #4               @ bottom word holds Method*
     98     .cfi_adjust_cfa_offset -4
     99     // Note: Likewise, we could avoid restoring R8 in the case of Baker
    100     // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    101     // later; but it's not worth handling this special case.
    102     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
    103     .cfi_restore r5
    104     .cfi_restore r6
    105     .cfi_restore r7
    106     .cfi_restore r8
    107     .cfi_restore r10
    108     .cfi_restore r11
    109     .cfi_restore lr
    110     .cfi_adjust_cfa_offset -28
    111 .endm
    112 
    113     /*
    114      * Macro that sets up the callee save frame to conform with
    115      * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
    116      */
    117 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    118     // Note: We could avoid saving R8 in the case of Baker read
    119     // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    120     // later; but it's not worth handling this special case.
    121     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
    122     .cfi_adjust_cfa_offset 40
    123     .cfi_rel_offset r1, 0
    124     .cfi_rel_offset r2, 4
    125     .cfi_rel_offset r3, 8
    126     .cfi_rel_offset r5, 12
    127     .cfi_rel_offset r6, 16
    128     .cfi_rel_offset r7, 20
    129     .cfi_rel_offset r8, 24
    130     .cfi_rel_offset r10, 28
    131     .cfi_rel_offset r11, 32
    132     .cfi_rel_offset lr, 36
    133     vpush {s0-s15}                     @ 16 words of float args.
    134     .cfi_adjust_cfa_offset 64
    135     sub sp, #8                         @ 2 words of space, alignment padding and Method*
    136     .cfi_adjust_cfa_offset 8
    137     // Ugly compile-time check, but we only have the preprocessor.
    138 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
    139 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
    140 #endif
    141 .endm
    142 
    143 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
    144     SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    145     RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
    146     @ Load kSaveRefsAndArgs Method* into rTemp.
    147     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
    148     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
    149     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    150 .endm
    151 
    152 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
    153     SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    154     str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
    155     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    156 .endm
    157 
    158 .macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
    159     add  sp, #8                      @ rewind sp
    160     .cfi_adjust_cfa_offset -8
    161     vpop {s0-s15}
    162     .cfi_adjust_cfa_offset -64
    163     // Note: Likewise, we could avoid restoring X20 in the case of Baker
    164     // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    165     // later; but it's not worth handling this special case.
    166     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
    167     .cfi_restore r1
    168     .cfi_restore r2
    169     .cfi_restore r3
    170     .cfi_restore r5
    171     .cfi_restore r6
    172     .cfi_restore r7
    173     .cfi_restore r8
    174     .cfi_restore r10
    175     .cfi_restore r11
    176     .cfi_restore lr
    177     .cfi_adjust_cfa_offset -40
    178 .endm
    179 
    180     /*
    181      * Macro that sets up the callee save frame to conform with
    182      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    183      * when core registers are already saved.
    184      */
    185 .macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp
    186                                         @ 14 words of callee saves and args already saved.
    187     vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
    188     .cfi_adjust_cfa_offset 128
    189     sub sp, #8                          @ 2 words of space, alignment padding and Method*
    190     .cfi_adjust_cfa_offset 8
    191     RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
    192     @ Load kSaveEverything Method* into rTemp.
    193     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
    194     str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
    195     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    196 
    197     // Ugly compile-time check, but we only have the preprocessor.
    198 #if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
    199 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
    200 #endif
    201 .endm
    202 
    203     /*
    204      * Macro that sets up the callee save frame to conform with
    205      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    206      */
    207 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp
    208     push {r0-r12, lr}                   @ 14 words of callee saves and args.
    209     .cfi_adjust_cfa_offset 56
    210     .cfi_rel_offset r0, 0
    211     .cfi_rel_offset r1, 4
    212     .cfi_rel_offset r2, 8
    213     .cfi_rel_offset r3, 12
    214     .cfi_rel_offset r4, 16
    215     .cfi_rel_offset r5, 20
    216     .cfi_rel_offset r6, 24
    217     .cfi_rel_offset r7, 28
    218     .cfi_rel_offset r8, 32
    219     .cfi_rel_offset r9, 36
    220     .cfi_rel_offset r10, 40
    221     .cfi_rel_offset r11, 44
    222     .cfi_rel_offset ip, 48
    223     .cfi_rel_offset lr, 52
    224     SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp
    225 .endm
    226 
    227 .macro RESTORE_SAVE_EVERYTHING_FRAME
    228     add  sp, #8                         @ rewind sp
    229     .cfi_adjust_cfa_offset -8
    230     vpop {d0-d15}
    231     .cfi_adjust_cfa_offset -128
    232     pop {r0-r12, lr}                    @ 14 words of callee saves
    233     .cfi_restore r0
    234     .cfi_restore r1
    235     .cfi_restore r2
    236     .cfi_restore r3
    237     .cfi_restore r4
    238     .cfi_restore r5
    239     .cfi_restore r6
    240     .cfi_restore r7
    241     .cfi_restore r8
    242     .cfi_restore r9
    243     .cfi_restore r10
    244     .cfi_restore r11
    245     .cfi_restore r12
    246     .cfi_restore lr
    247     .cfi_adjust_cfa_offset -56
    248 .endm
    249 
    250 .macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
    251     add  sp, #8                         @ rewind sp
    252     .cfi_adjust_cfa_offset -8
    253     vpop {d0-d15}
    254     .cfi_adjust_cfa_offset -128
    255     add  sp, #4                         @ skip r0
    256     .cfi_adjust_cfa_offset -4
    257     .cfi_restore r0                     @ debugger can no longer restore caller's r0
    258     pop {r1-r12, lr}                    @ 13 words of callee saves
    259     .cfi_restore r1
    260     .cfi_restore r2
    261     .cfi_restore r3
    262     .cfi_restore r4
    263     .cfi_restore r5
    264     .cfi_restore r6
    265     .cfi_restore r7
    266     .cfi_restore r8
    267     .cfi_restore r9
    268     .cfi_restore r10
    269     .cfi_restore r11
    270     .cfi_restore r12
    271     .cfi_restore lr
    272     .cfi_adjust_cfa_offset -52
    273 .endm
    274 
    275 // Macro to refresh the Marking Register (R8).
    276 //
    277 // This macro must be called at the end of functions implementing
    278 // entrypoints that possibly (directly or indirectly) perform a
    279 // suspend check (before they return).
    280 .macro REFRESH_MARKING_REGISTER
    281 #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
    282     ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
    283 #endif
    284 .endm
    285 
    286 .macro RETURN_IF_RESULT_IS_ZERO
    287     cbnz   r0, 1f              @ result non-zero branch over
    288     bx     lr                  @ return
    289 1:
    290 .endm
    291 
    292 .macro RETURN_IF_RESULT_IS_NON_ZERO
    293     cbz    r0, 1f              @ result zero branch over
    294     bx     lr                  @ return
    295 1:
    296 .endm
    297 
    298     /*
    299      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    300      * exception is Thread::Current()->exception_ when the runtime method frame is ready.
    301      */
    302 .macro DELIVER_PENDING_EXCEPTION_FRAME_READY
    303     mov    r0, r9                              @ pass Thread::Current
    304     bl     artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
    305 .endm
    306 
    307     /*
    308      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    309      * exception is Thread::Current()->exception_.
    310      */
    311 .macro DELIVER_PENDING_EXCEPTION
    312     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
    313     DELIVER_PENDING_EXCEPTION_FRAME_READY
    314 .endm
    315 
    316 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    317     .extern \cxx_name
    318 ENTRY \c_name
    319     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
    320     mov r0, r9                      @ pass Thread::Current
    321     bl  \cxx_name                   @ \cxx_name(Thread*)
    322 END \c_name
    323 .endm
    324 
    325 .macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    326     .extern \cxx_name
    327 ENTRY \c_name
    328     SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
    329     mov r0, r9                      @ pass Thread::Current
    330     bl  \cxx_name                   @ \cxx_name(Thread*)
    331 END \c_name
    332 .endm
    333 
    334 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    335     .extern \cxx_name
    336 ENTRY \c_name
    337     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
    338     mov r1, r9                      @ pass Thread::Current
    339     bl  \cxx_name                   @ \cxx_name(Thread*)
    340 END \c_name
    341 .endm
    342 
    343 .macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    344     .extern \cxx_name
    345 ENTRY \c_name
    346     SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
    347     mov r2, r9                      @ pass Thread::Current
    348     bl  \cxx_name                   @ \cxx_name(Thread*)
    349 END \c_name
    350 .endm
    351 
    352 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
    353     ldr \reg, [r9, #THREAD_EXCEPTION_OFFSET]   // Get exception field.
    354     cbnz \reg, 1f
    355     bx lr
    356 1:
    357     DELIVER_PENDING_EXCEPTION
    358 .endm
    359 
    360 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    361     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
    362 .endm
    363 
    364 .macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    365     RETURN_IF_RESULT_IS_ZERO
    366     DELIVER_PENDING_EXCEPTION
    367 .endm
    368 
    369 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    370     RETURN_IF_RESULT_IS_NON_ZERO
    371     DELIVER_PENDING_EXCEPTION
    372 .endm
    373 
    374 // Macros taking opportunity of code similarities for downcalls.
    375 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
    376     .extern \entrypoint
    377 ENTRY \name
    378     SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
    379     mov    r1, r9                        @ pass Thread::Current
    380     bl     \entrypoint                   @ (uint32_t field_idx, Thread*)
    381     RESTORE_SAVE_REFS_ONLY_FRAME
    382     REFRESH_MARKING_REGISTER
    383     \return
    384 END \name
    385 .endm
    386 
    387 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
    388     .extern \entrypoint
    389 ENTRY \name
    390     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
    391     mov    r2, r9                        @ pass Thread::Current
    392     bl     \entrypoint                   @ (field_idx, Object*, Thread*)
    393     RESTORE_SAVE_REFS_ONLY_FRAME
    394     REFRESH_MARKING_REGISTER
    395     \return
    396 END \name
    397 .endm
    398 
    399 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
    400     .extern \entrypoint
    401 ENTRY \name
    402     SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
    403     mov    r3, r9                        @ pass Thread::Current
    404     bl     \entrypoint                   @ (field_idx, Object*, new_val, Thread*)
    405     RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
    406     REFRESH_MARKING_REGISTER
    407     \return
    408 END \name
    409 .endm
    410 
    411     /*
    412      * Called by managed code, saves callee saves and then calls artThrowException
    413      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    414      */
    415 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    416 
    417     /*
    418      * Called by managed code to create and deliver a NullPointerException.
    419      */
    420 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    421 
    422     /*
    423      * Call installed by a signal handler to create and deliver a NullPointerException.
    424      */
    425     .extern art_quick_throw_null_pointer_exception_from_signal
    426 ENTRY art_quick_throw_null_pointer_exception_from_signal
    427     // The fault handler pushes the gc map address, i.e. "return address", to stack
    428     // and passes the fault address in LR. So we need to set up the CFI info accordingly.
    429     .cfi_def_cfa_offset __SIZEOF_POINTER__
    430     .cfi_rel_offset lr, 0
    431     push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
    432     .cfi_adjust_cfa_offset 52
    433     .cfi_rel_offset r0, 0
    434     .cfi_rel_offset r1, 4
    435     .cfi_rel_offset r2, 8
    436     .cfi_rel_offset r3, 12
    437     .cfi_rel_offset r4, 16
    438     .cfi_rel_offset r5, 20
    439     .cfi_rel_offset r6, 24
    440     .cfi_rel_offset r7, 28
    441     .cfi_rel_offset r8, 32
    442     .cfi_rel_offset r9, 36
    443     .cfi_rel_offset r10, 40
    444     .cfi_rel_offset r11, 44
    445     .cfi_rel_offset ip, 48
    446 
    447     @ save all registers as basis for long jump context
    448     SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
    449     mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
    450     mov r1, r9                      @ pass Thread::Current
    451     bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
    452 END art_quick_throw_null_pointer_exception_from_signal
    453 
    454     /*
    455      * Called by managed code to create and deliver an ArithmeticException.
    456      */
    457 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
    458 
    459     /*
    460      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    461      * index, arg2 holds limit.
    462      */
    463 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    464 
    465     /*
    466      * Called by managed code to create and deliver a StringIndexOutOfBoundsException
    467      * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
    468      */
    469 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
    470 
    471     /*
    472      * Called by managed code to create and deliver a StackOverflowError.
    473      */
    474 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    475 
    476     /*
    477      * All generated callsites for interface invokes and invocation slow paths will load arguments
    478      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
    479      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
    480      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
    481      *
    482      * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
    483      * of the target Method* in r0 and method->code_ in r1.
    484      *
    485      * If unsuccessful, the helper will return null/null. There will bea pending exception in the
    486      * thread and we branch to another stub to deliver it.
    487      *
    488      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
    489      * pointing back to the original caller.
    490      *
    491      * Clobbers IP (R12).
    492      */
    493 .macro INVOKE_TRAMPOLINE_BODY cxx_name
    494     .extern \cxx_name
    495     SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
    496     mov    r2, r9                         @ pass Thread::Current
    497     mov    r3, sp
    498     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
    499     mov    r12, r1                        @ save Method*->code_
    500     RESTORE_SAVE_REFS_AND_ARGS_FRAME
    501     REFRESH_MARKING_REGISTER
    502     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
    503     bx     r12                            @ tail call to target
    504 1:
    505     DELIVER_PENDING_EXCEPTION
    506 .endm
    507 .macro INVOKE_TRAMPOLINE c_name, cxx_name
    508 ENTRY \c_name
    509     INVOKE_TRAMPOLINE_BODY \cxx_name
    510 END \c_name
    511 .endm
    512 
    513 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    514 
    515 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    516 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    517 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    518 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    519 
    520     /*
    521      * Quick invocation stub internal.
    522      * On entry:
    523      *   r0 = method pointer
    524      *   r1 = argument array or null for no argument methods
    525      *   r2 = size of argument array in bytes
    526      *   r3 = (managed) thread pointer
    527      *   [sp] = JValue* result
    528      *   [sp + 4] = result_in_float
    529      *   [sp + 8] = core register argument array
    530      *   [sp + 12] = fp register argument array
    531      *  +-------------------------+
    532      *  | uint32_t* fp_reg_args   |
    533      *  | uint32_t* core_reg_args |
    534      *  |   result_in_float       | <- Caller frame
    535      *  |   Jvalue* result        |
    536      *  +-------------------------+
    537      *  |          lr             |
    538      *  |          r11            |
    539      *  |          r9             |
    540      *  |          r4             | <- r11
    541      *  +-------------------------+
    542      *  | uint32_t out[n-1]       |
    543      *  |    :      :             |        Outs
    544      *  | uint32_t out[0]         |
    545      *  | StackRef<ArtMethod>     | <- SP  value=null
    546      *  +-------------------------+
    547      */
    548 ENTRY art_quick_invoke_stub_internal
    549     SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
    550     mov    r11, sp                         @ save the stack pointer
    551     .cfi_def_cfa_register r11
    552 
    553     mov    r9, r3                          @ move managed thread pointer into r9
    554 
    555     add    r4, r2, #4                      @ create space for method pointer in frame
    556     sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
    557     and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
    558     mov    sp, r4                          @ 16B alignment ourselves.
    559 
    560     mov    r4, r0                          @ save method*
    561     add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
    562     bl     memcpy                          @ memcpy (dest, src, bytes)
    563     mov    ip, #0                          @ set ip to 0
    564     str    ip, [sp]                        @ store null for method* at bottom of frame
    565 
    566     ldr    ip, [r11, #48]                  @ load fp register argument array pointer
    567     vldm   ip, {s0-s15}                    @ copy s0 - s15
    568 
    569     ldr    ip, [r11, #44]                  @ load core register argument array pointer
    570     mov    r0, r4                          @ restore method*
    571     add    ip, ip, #4                      @ skip r0
    572     ldm    ip, {r1-r3}                     @ copy r1 - r3
    573 
    574 #ifdef ARM_R4_SUSPEND_FLAG
    575     mov    r4, #SUSPEND_CHECK_INTERVAL     @ reset r4 to suspend check interval
    576 #endif
    577 
    578     REFRESH_MARKING_REGISTER
    579 
    580     ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
    581     blx    ip                              @ call the method
    582 
    583     mov    sp, r11                         @ restore the stack pointer
    584     .cfi_def_cfa_register sp
    585 
    586     ldr    r4, [sp, #40]                   @ load result_is_float
    587     ldr    r9, [sp, #36]                   @ load the result pointer
    588     cmp    r4, #0
    589     ite    eq
    590     strdeq r0, [r9]                        @ store r0/r1 into result pointer
    591     vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
    592 
    593     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
    594 END art_quick_invoke_stub_internal
    595 
    596     /*
    597      * On stack replacement stub.
    598      * On entry:
    599      *   r0 = stack to copy
    600      *   r1 = size of stack
    601      *   r2 = pc to call
    602      *   r3 = JValue* result
    603      *   [sp] = shorty
    604      *   [sp + 4] = thread
    605      */
    606 ENTRY art_quick_osr_stub
    607     SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
    608     mov    r11, sp                         @ Save the stack pointer
    609     mov    r10, r1                         @ Save size of stack
    610     ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
    611     REFRESH_MARKING_REGISTER
    612     mov    r6, r2                          @ Save the pc to call
    613     sub    r7, sp, #12                     @ Reserve space for stack pointer,
    614                                            @    JValue* result, and ArtMethod* slot.
    615     and    r7, #0xFFFFFFF0                 @ Align stack pointer
    616     mov    sp, r7                          @ Update stack pointer
    617     str    r11, [sp, #4]                   @ Save old stack pointer
    618     str    r3, [sp, #8]                    @ Save JValue* result
    619     mov    ip, #0
    620     str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
    621     sub    sp, sp, r1                      @ Reserve space for callee stack
    622     mov    r2, r1
    623     mov    r1, r0
    624     mov    r0, sp
    625     bl     memcpy                          @ memcpy (dest r0, src r1, bytes r2)
    626     bl     .Losr_entry                     @ Call the method
    627     ldr    r10, [sp, #8]                   @ Restore JValue* result
    628     ldr    sp, [sp, #4]                    @ Restore saved stack pointer
    629     ldr    r4, [sp, #36]                   @ load shorty
    630     ldrb   r4, [r4, #0]                    @ load return type
    631     cmp    r4, #68                         @ Test if result type char == 'D'.
    632     beq    .Losr_fp_result
    633     cmp    r4, #70                         @ Test if result type char == 'F'.
    634     beq    .Losr_fp_result
    635     strd r0, [r10]                         @ Store r0/r1 into result pointer
    636     b    .Losr_exit
    637 .Losr_fp_result:
    638     vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
    639 .Losr_exit:
    640     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
    641 .Losr_entry:
    642     sub r10, r10, #4
    643     str lr, [sp, r10]                     @ Store link register per the compiler ABI
    644     bx r6
    645 END art_quick_osr_stub
    646 
    647     /*
    648      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
    649      */
    650 ARM_ENTRY art_quick_do_long_jump
    651     vldm r1, {s0-s31}     @ load all fprs from argument fprs_
    652     ldr  r2, [r0, #60]    @ r2 = r15 (PC from gprs_ 60=4*15)
    653     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
    654     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
    655     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
    656     REFRESH_MARKING_REGISTER
    657     ldr  r0, [r0, #-12]   @ load r0 value
    658     mov  r1, #0           @ clear result register r1
    659     bx   r2               @ do long jump
    660 END art_quick_do_long_jump
    661 
    662     /*
    663      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
    664      * failure.
    665      */
    666 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    667 
    668     /*
    669      * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
    670      * possibly null object to lock.
    671      */
    672     .extern artLockObjectFromCode
    673 ENTRY art_quick_lock_object
    674     cbz    r0, .Lslow_lock
    675 .Lretry_lock:
    676     ldr    r2, [r9, #THREAD_ID_OFFSET]
    677     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    678     mov    r3, r1
    679     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    680     cbnz   r3, .Lnot_unlocked         @ already thin locked
    681     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
    682     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
    683     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    684     cbnz   r3, .Llock_strex_fail      @ store failed, retry
    685     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
    686     bx lr
    687 .Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
    688     lsr    r3, r1, LOCK_WORD_STATE_SHIFT
    689     cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
    690     eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    691     uxth   r2, r2                     @ zero top 16 bits
    692     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
    693                                       @ else contention, go to slow path
    694     mov    r3, r1                     @ copy the lock word to check count overflow.
    695     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
    696     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
    697     lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
    698     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
    699     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
    700     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
    701     cbnz   r3, .Llock_strex_fail      @ strex failed, retry
    702     bx lr
    703 .Llock_strex_fail:
    704     b      .Lretry_lock               @ retry
    705 .Lslow_lock:
    706     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
    707     mov    r1, r9                     @ pass Thread::Current
    708     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    709     RESTORE_SAVE_REFS_ONLY_FRAME
    710     REFRESH_MARKING_REGISTER
    711     RETURN_IF_RESULT_IS_ZERO
    712     DELIVER_PENDING_EXCEPTION
    713 END art_quick_lock_object
    714 
    715 ENTRY art_quick_lock_object_no_inline
    716     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
    717     mov    r1, r9                     @ pass Thread::Current
    718     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    719     RESTORE_SAVE_REFS_ONLY_FRAME
    720     REFRESH_MARKING_REGISTER
    721     RETURN_IF_RESULT_IS_ZERO
    722     DELIVER_PENDING_EXCEPTION
    723 END art_quick_lock_object_no_inline
    724 
    725     /*
    726      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
    727      * r0 holds the possibly null object to lock.
    728      */
    729     .extern artUnlockObjectFromCode
    730 ENTRY art_quick_unlock_object
    731     cbz    r0, .Lslow_unlock
    732 .Lretry_unlock:
    733 #ifndef USE_READ_BARRIER
    734     ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    735 #else
    736     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
    737 #endif
    738     lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
    739     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
    740     ldr    r2, [r9, #THREAD_ID_OFFSET]
    741     mov    r3, r1                     @ copy lock word to check thread id equality
    742     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    743     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    744     uxth   r3, r3                     @ zero top 16 bits
    745     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
    746     mov    r3, r1                     @ copy lock word to detect transition to unlocked
    747     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    748     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
    749     bpl    .Lrecursive_thin_unlock
    750     @ transition to unlocked
    751     mov    r3, r1
    752     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
    753     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
    754 #ifndef USE_READ_BARRIER
    755     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    756 #else
    757     strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    758     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    759 #endif
    760     bx     lr
    761 .Lrecursive_thin_unlock:  @ r1: original lock word
    762     sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
    763 #ifndef USE_READ_BARRIER
    764     str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    765 #else
    766     strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    767     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    768 #endif
    769     bx     lr
    770 .Lunlock_strex_fail:
    771     b      .Lretry_unlock             @ retry
    772 .Lslow_unlock:
    773     @ save callee saves in case exception allocation triggers GC
    774     SETUP_SAVE_REFS_ONLY_FRAME r1
    775     mov    r1, r9                     @ pass Thread::Current
    776     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    777     RESTORE_SAVE_REFS_ONLY_FRAME
    778     REFRESH_MARKING_REGISTER
    779     RETURN_IF_RESULT_IS_ZERO
    780     DELIVER_PENDING_EXCEPTION
    781 END art_quick_unlock_object
    782 
    783 ENTRY art_quick_unlock_object_no_inline
    784     @ save callee saves in case exception allocation triggers GC
    785     SETUP_SAVE_REFS_ONLY_FRAME r1
    786     mov    r1, r9                     @ pass Thread::Current
    787     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    788     RESTORE_SAVE_REFS_ONLY_FRAME
    789     REFRESH_MARKING_REGISTER
    790     RETURN_IF_RESULT_IS_ZERO
    791     DELIVER_PENDING_EXCEPTION
    792 END art_quick_unlock_object_no_inline
    793 
    794     /*
    795      * Entry from managed code that calls artInstanceOfFromCode and on failure calls
    796      * artThrowClassCastExceptionForObject.
    797      */
    798     .extern artInstanceOfFromCode
    799     .extern artThrowClassCastExceptionForObject
    800 ENTRY art_quick_check_instance_of
    801     push {r0-r1, lr}                    @ save arguments, link register and pad
    802     .cfi_adjust_cfa_offset 12
    803     .cfi_rel_offset r0, 0
    804     .cfi_rel_offset r1, 4
    805     .cfi_rel_offset lr, 8
    806     sub sp, #4
    807     .cfi_adjust_cfa_offset 4
    808     bl artInstanceOfFromCode
    809     cbz    r0, .Lthrow_class_cast_exception
    810     add sp, #4
    811     .cfi_adjust_cfa_offset -4
    812     pop {r0-r1, pc}
    813     .cfi_adjust_cfa_offset 4        @ Reset unwind info so following code unwinds.
    814 .Lthrow_class_cast_exception:
    815     add sp, #4
    816     .cfi_adjust_cfa_offset -4
    817     pop {r0-r1, lr}
    818     .cfi_adjust_cfa_offset -12
    819     .cfi_restore r0
    820     .cfi_restore r1
    821     .cfi_restore lr
    822     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
    823     mov r2, r9                      @ pass Thread::Current
    824     bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
    825     bkpt
    826 END art_quick_check_instance_of
    827 
    828 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
    829 .macro POP_REG_NE rReg, offset, rExclude
    830     .ifnc \rReg, \rExclude
    831         ldr \rReg, [sp, #\offset]   @ restore rReg
    832         .cfi_restore \rReg
    833     .endif
    834 .endm
    835 
    836 // Save rReg's value to [sp, #offset].
    837 .macro PUSH_REG rReg, offset
    838     str \rReg, [sp, #\offset]       @ save rReg
    839     .cfi_rel_offset \rReg, \offset
    840 .endm
    841 
    842     /*
    843      * Macro to insert read barrier, only used in art_quick_aput_obj.
    844      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
    845      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
    846      */
    847 .macro READ_BARRIER rDest, rObj, offset
    848 #ifdef USE_READ_BARRIER
    849     push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
    850     .cfi_adjust_cfa_offset 24
    851     .cfi_rel_offset r0, 0
    852     .cfi_rel_offset r1, 4
    853     .cfi_rel_offset r2, 8
    854     .cfi_rel_offset r3, 12
    855     .cfi_rel_offset ip, 16
    856     .cfi_rel_offset lr, 20
    857     sub sp, #8                      @ push padding
    858     .cfi_adjust_cfa_offset 8
    859     @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
    860     .ifnc \rObj, r1
    861         mov r1, \rObj               @ pass rObj
    862     .endif
    863     mov r2, #\offset                @ pass offset
    864     bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
    865     @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
    866     .ifnc \rDest, r0
    867         mov \rDest, r0              @ save return value in rDest
    868     .endif
    869     add sp, #8                      @ pop padding
    870     .cfi_adjust_cfa_offset -8
    871     POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
    872     POP_REG_NE r1, 4, \rDest
    873     POP_REG_NE r2, 8, \rDest
    874     POP_REG_NE r3, 12, \rDest
    875     POP_REG_NE ip, 16, \rDest
    876     add sp, #20
    877     .cfi_adjust_cfa_offset -20
    878     pop {lr}                        @ restore lr
    879     .cfi_adjust_cfa_offset -4
    880     .cfi_restore lr
    881 #else
    882     ldr \rDest, [\rObj, #\offset]
    883     UNPOISON_HEAP_REF \rDest
    884 #endif  // USE_READ_BARRIER
    885 .endm
    886 
    887 #ifdef USE_READ_BARRIER
    888     .extern artReadBarrierSlow
    889 #endif
    890     .hidden art_quick_aput_obj
    891 ENTRY art_quick_aput_obj
    892 #ifdef USE_READ_BARRIER
    893     @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
    894     tst r2, r2
    895     beq .Ldo_aput_null
    896 #else
    897     cbz r2, .Ldo_aput_null
    898 #endif  // USE_READ_BARRIER
    899     READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
    900     READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
    901     READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
    902     cmp r3, ip  @ value's type == array's component type - trivial assignability
    903     bne .Lcheck_assignability
    904 .Ldo_aput:
    905     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    906     POISON_HEAP_REF r2
    907     str r2, [r3, r1, lsl #2]
    908     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    909     lsr r0, r0, #CARD_TABLE_CARD_SHIFT
    910     strb r3, [r3, r0]
    911     blx lr
    912 .Ldo_aput_null:
    913     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    914     str r2, [r3, r1, lsl #2]
    915     blx lr
    916 .Lcheck_assignability:
    917     push {r0-r2, lr}             @ save arguments
    918     .cfi_adjust_cfa_offset 16
    919     .cfi_rel_offset r0, 0
    920     .cfi_rel_offset r1, 4
    921     .cfi_rel_offset r2, 8
    922     .cfi_rel_offset lr, 12
    923     mov r1, ip
    924     mov r0, r3
    925     bl artIsAssignableFromCode
    926     cbz r0, .Lthrow_array_store_exception
    927     pop {r0-r2, lr}
    928     .cfi_restore r0
    929     .cfi_restore r1
    930     .cfi_restore r2
    931     .cfi_restore lr
    932     .cfi_adjust_cfa_offset -16
    933     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    934     POISON_HEAP_REF r2
    935     str r2, [r3, r1, lsl #2]
    936     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    937     lsr r0, r0, #CARD_TABLE_CARD_SHIFT
    938     strb r3, [r3, r0]
    939     blx lr
    940 .Lthrow_array_store_exception:
    941     pop {r0-r2, lr}
    942     /* No need to repeat restore cfi directives, the ones above apply here. */
    943     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
    944     mov r1, r2
    945     mov r2, r9                     @ pass Thread::Current
    946     bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
    947     bkpt                           @ unreached
    948 END art_quick_aput_obj
    949 
    950 // Macro to facilitate adding new allocation entrypoints.
    951 .macro ONE_ARG_DOWNCALL name, entrypoint, return
    952     .extern \entrypoint
    953 ENTRY \name
    954     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
    955     mov    r1, r9                     @ pass Thread::Current
    956     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    957     RESTORE_SAVE_REFS_ONLY_FRAME
    958     REFRESH_MARKING_REGISTER
    959     \return
    960 END \name
    961 .endm
    962 
    963 // Macro to facilitate adding new allocation entrypoints.
    964 .macro TWO_ARG_DOWNCALL name, entrypoint, return
    965     .extern \entrypoint
    966 ENTRY \name
    967     SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
    968     mov    r2, r9                     @ pass Thread::Current
    969     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    970     RESTORE_SAVE_REFS_ONLY_FRAME
    971     REFRESH_MARKING_REGISTER
    972     \return
    973 END \name
    974 .endm
    975 
    976 // Macro to facilitate adding new array allocation entrypoints.
    977 .macro THREE_ARG_DOWNCALL name, entrypoint, return
    978     .extern \entrypoint
    979 ENTRY \name
    980     SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
    981     mov    r3, r9                     @ pass Thread::Current
    982     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
    983     bl     \entrypoint
    984     RESTORE_SAVE_REFS_ONLY_FRAME
    985     REFRESH_MARKING_REGISTER
    986     \return
    987 END \name
    988 .endm
    989 
    990 // Macro to facilitate adding new allocation entrypoints.
    991 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
    992     .extern \entrypoint
    993 ENTRY \name
    994     SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
    995     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
    996     .cfi_adjust_cfa_offset 16
    997     bl     \entrypoint
    998     add    sp, #16                    @ strip the extra frame
    999     .cfi_adjust_cfa_offset -16
   1000     RESTORE_SAVE_REFS_ONLY_FRAME
   1001     REFRESH_MARKING_REGISTER
   1002     \return
   1003 END \name
   1004 .endm
   1005 
   1006 // Macro for string and type resolution and initialization.
   1007 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
   1008     .extern \entrypoint
   1009 ENTRY \name
   1010     SETUP_SAVE_EVERYTHING_FRAME r1    @ save everything in case of GC
   1011     mov    r1, r9                     @ pass Thread::Current
   1012     bl     \entrypoint                @ (uint32_t index, Thread*)
   1013     cbz    r0, 1f                     @ If result is null, deliver the OOME.
   1014     .cfi_remember_state
   1015     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
   1016     REFRESH_MARKING_REGISTER
   1017     bx     lr
   1018     .cfi_restore_state
   1019 1:
   1020     DELIVER_PENDING_EXCEPTION_FRAME_READY
   1021 END \name
   1022 .endm
   1023 
   1024 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
   1025 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
   1026 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
   1027 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
   1028 
   1029 // Note: Functions `art{Get,Set}<Kind>{Static,Instance>FromCompiledCode` are
   1030 // defined by macros in runtime/entrypoints/quick/quick_field_entrypoints.cc.
   1031 
   1032     /*
   1033      * Called by managed code to resolve a static field and load a non-wide value.
   1034      */
   1035 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1036 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1037 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1038 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1039 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1040 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1041     /*
   1042      * Called by managed code to resolve a static field and load a 64-bit primitive value.
   1043      */
   1044     .extern artGet64StaticFromCompiledCode
   1045 ENTRY art_quick_get64_static
   1046     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
   1047     mov    r1, r9                        @ pass Thread::Current
   1048     bl     artGet64StaticFromCompiledCode        @ (uint32_t field_idx, Thread*)
   1049     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1050     RESTORE_SAVE_REFS_ONLY_FRAME
   1051     REFRESH_MARKING_REGISTER
   1052     cbnz   r2, 1f                        @ success if no exception pending
   1053     bx     lr                            @ return on success
   1054 1:
   1055     DELIVER_PENDING_EXCEPTION
   1056 END art_quick_get64_static
   1057 
   1058     /*
   1059      * Called by managed code to resolve an instance field and load a non-wide value.
   1060      */
   1061 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1062 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1063 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1064 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1065 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1066 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1067     /*
   1068      * Called by managed code to resolve an instance field and load a 64-bit primitive value.
   1069      */
   1070     .extern artGet64InstanceFromCompiledCode
   1071 ENTRY art_quick_get64_instance
   1072     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
   1073     mov    r2, r9                        @ pass Thread::Current
   1074     bl     artGet64InstanceFromCompiledCode      @ (field_idx, Object*, Thread*)
   1075     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1076     RESTORE_SAVE_REFS_ONLY_FRAME
   1077     REFRESH_MARKING_REGISTER
   1078     cbnz   r2, 1f                        @ success if no exception pending
   1079     bx     lr                            @ return on success
   1080 1:
   1081     DELIVER_PENDING_EXCEPTION
   1082 END art_quick_get64_instance
   1083 
   1084     /*
   1085      * Called by managed code to resolve a static field and store a value.
   1086      */
   1087 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1088 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1089 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1090 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1091 
   1092     /*
   1093      * Called by managed code to resolve an instance field and store a non-wide value.
   1094      */
   1095 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1096 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1097 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1098 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1099 
   1100     /*
   1101      * Called by managed code to resolve an instance field and store a wide value.
   1102      */
   1103     .extern artSet64InstanceFromCompiledCode
   1104 ENTRY art_quick_set64_instance
   1105     SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
   1106                                          @ r2:r3 contain the wide argument
   1107     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
   1108     .cfi_adjust_cfa_offset 16
   1109     bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
   1110     add    sp, #16                       @ release out args
   1111     .cfi_adjust_cfa_offset -16
   1112     RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
   1113     REFRESH_MARKING_REGISTER
   1114     RETURN_IF_RESULT_IS_ZERO
   1115     DELIVER_PENDING_EXCEPTION
   1116 END art_quick_set64_instance
   1117 
   1118     .extern artSet64StaticFromCompiledCode
   1119 ENTRY art_quick_set64_static
   1120     SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
   1121                                           @ r2:r3 contain the wide argument
   1122     str    r9, [sp, #-16]!                @ expand the frame and pass Thread::Current
   1123     .cfi_adjust_cfa_offset 16
   1124     bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
   1125     add    sp, #16                        @ release out args
   1126     .cfi_adjust_cfa_offset -16
   1127     RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
   1128     REFRESH_MARKING_REGISTER
   1129     RETURN_IF_RESULT_IS_ZERO
   1130     DELIVER_PENDING_EXCEPTION
   1131 END art_quick_set64_static
   1132 
   1133 // Generate the allocation entrypoints for each allocator.
   1134 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
   1135 // Comment out allocators that have arm specific asm.
   1136 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
   1137 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
   1138 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1139 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
   1140 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
   1141 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
   1142 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
   1143 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
   1144 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
   1145 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
   1146 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
   1147 
   1148 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
   1149 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
   1150 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
   1151 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
   1152 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
   1153 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
   1154 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
   1155 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
   1156 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
   1157 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
   1158 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
   1159 
   1160 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
   1161 //
   1162 // If isInitialized=1 then the compiler assumes the object's class has already been initialized.
   1163 // If isInitialized=0 the compiler can only assume it's been at least resolved.
   1164 .macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
   1165 ENTRY \c_name
   1166     // Fast path rosalloc allocation.
   1167     // r0: type/return value, r9: Thread::Current
   1168     // r1, r2, r3, r12: free.
   1169     ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
   1170                                                               // allocation stack has room.
   1171                                                               // TODO: consider using ldrd.
   1172     ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
   1173     cmp    r3, r12
   1174     bhs    .Lslow_path\c_name
   1175 
   1176     ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
   1177     cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
   1178                                                               // local allocation. Also does the
   1179                                                               // initialized and finalizable checks.
   1180     // When isInitialized == 0, then the class is potentially not yet initialized.
   1181     // If the class is not yet initialized, the object size will be very large to force the branch
   1182     // below to be taken.
   1183     //
   1184     // See InitializeClassVisitors in class-inl.h for more details.
   1185     bhs    .Lslow_path\c_name
   1186                                                               // Compute the rosalloc bracket index
   1187                                                               // from the size. Since the size is
   1188                                                               // already aligned we can combine the
   1189                                                               // two shifts together.
   1190     add    r12, r9, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
   1191                                                               // Subtract pointer size since ther
   1192                                                               // are no runs for 0 byte allocations
   1193                                                               // and the size is already aligned.
   1194                                                               // Load the rosalloc run (r12)
   1195     ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
   1196                                                               // Load the free list head (r3). This
   1197                                                               // will be the return val.
   1198     ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1199     cbz    r3, .Lslow_path\c_name
   1200     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1201     ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
   1202                                                               // and update the list head with the
   1203                                                               // next pointer.
   1204     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1205                                                               // Store the class pointer in the
   1206                                                               // header. This also overwrites the
   1207                                                               // next pointer. The offsets are
   1208                                                               // asserted to match.
   1209 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
   1210 #error "Class pointer needs to overwrite next pointer."
   1211 #endif
   1212     POISON_HEAP_REF r0
   1213     str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
   1214                                                               // Push the new object onto the thread
   1215                                                               // local allocation stack and
   1216                                                               // increment the thread local
   1217                                                               // allocation stack top.
   1218     ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1219     str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
   1220     str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1221                                                               // Decrement the size of the free list
   1222 
   1223     // After this "STR" the object is published to the thread local allocation stack,
   1224     // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
   1225     // It is not yet visible to the running (user) compiled code until after the return.
   1226     //
   1227     // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
   1228     // the state of the allocation stack slot. It can be a pointer to one of:
   1229     // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
   1230     //       (The stack initial state is "null" pointers).
   1231     // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
   1232     // 2) A fully valid object, with a valid class pointer pointing to a real class.
   1233     // Other states are not allowed.
   1234     //
   1235     // An object that is invalid only temporarily, and will eventually become valid.
   1236     // The internal runtime code simply checks if the object is not null or is partial and then
   1237     // ignores it.
   1238     //
   1239     // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
   1240     // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
   1241     // "next" pointer is not-cyclic.)
   1242     //
   1243     // See also b/28790624 for a listing of CLs dealing with this race.
   1244     ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1245     sub    r1, #1
   1246                                                               // TODO: consider combining this store
   1247                                                               // and the list head store above using
   1248                                                               // strd.
   1249     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1250 
   1251     mov    r0, r3                                             // Set the return value and return.
   1252 .if \isInitialized == 0
   1253     // This barrier is only necessary when the allocation also requires
   1254     // a class initialization check.
   1255     //
   1256     // If the class is already observably initialized, then new-instance allocations are protected
   1257     // from publishing by the compiler which inserts its own StoreStore barrier.
   1258     dmb    ish
   1259     // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
   1260     // they should happen-after the implicit initialization check.
   1261     //
   1262     // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
   1263     // a new observably-initialized class state.
   1264 .endif
   1265     bx     lr
   1266 
   1267 .Lslow_path\c_name:
   1268     SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
   1269     mov    r1, r9                     @ pass Thread::Current
   1270     bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
   1271     RESTORE_SAVE_REFS_ONLY_FRAME
   1272     REFRESH_MARKING_REGISTER
   1273     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1274 END \c_name
   1275 .endm
   1276 
   1277 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
   1278 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
   1279 
   1280 // The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
   1281 // and art_quick_alloc_object_resolved/initialized_region_tlab.
   1282 //
   1283 // r0: type r9: Thread::Current, r1, r2, r3, r12: free.
   1284 // Need to preserve r0 to the slow path.
   1285 //
   1286 // If isInitialized=1 then the compiler assumes the object's class has already been initialized.
   1287 // If isInitialized=0 the compiler can only assume it's been at least resolved.
   1288 .macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
   1289                                                              // Load thread_local_pos (r12) and
   1290                                                              // thread_local_end (r3) with ldrd.
   1291                                                              // Check constraints for ldrd.
   1292 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
   1293 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
   1294 #endif
   1295     ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
   1296     sub    r12, r3, r12                                       // Compute the remaining buf size.
   1297     ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
   1298     cmp    r3, r12                                            // Check if it fits.
   1299     // When isInitialized == 0, then the class is potentially not yet initialized.
   1300     // If the class is not yet initialized, the object size will be very large to force the branch
   1301     // below to be taken.
   1302     //
   1303     // See InitializeClassVisitors in class-inl.h for more details.
   1304     bhi    \slowPathLabel
   1305     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1306                                                               // Reload old thread_local_pos (r0)
   1307                                                               // for the return value.
   1308     ldr    r2, [r9, #THREAD_LOCAL_POS_OFFSET]
   1309     add    r1, r2, r3
   1310     str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
   1311     // After this "STR" the object is published to the thread local allocation stack,
   1312     // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
   1313     // It is not yet visible to the running (user) compiled code until after the return.
   1314     //
   1315     // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
   1316     // the state of the object. It can be either:
   1317     // 1) A partially valid object, with a null class pointer
   1318     //       (because the initial state of TLAB buffers is all 0s/nulls).
   1319     // 2) A fully valid object, with a valid class pointer pointing to a real class.
   1320     // Other states are not allowed.
   1321     //
   1322     // An object that is invalid only temporarily, and will eventually become valid.
   1323     // The internal runtime code simply checks if the object is not null or is partial and then
   1324     // ignores it.
   1325     //
   1326     // (Note: The actual check is done by checking that the object's class pointer is non-null.
   1327     // Also, unlike rosalloc, the object can never be observed as null).
   1328     ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
   1329     add    r1, r1, #1
   1330     str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
   1331     POISON_HEAP_REF r0
   1332     str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1333                                                               // Fence. This is "ish" not "ishst" so
   1334                                                               // that the code after this allocation
   1335                                                               // site will see the right values in
   1336                                                               // the fields of the class.
   1337     mov    r0, r2
   1338 .if \isInitialized == 0
   1339     // This barrier is only necessary when the allocation also requires
   1340     // a class initialization check.
   1341     //
   1342     // If the class is already observably initialized, then new-instance allocations are protected
   1343     // from publishing by the compiler which inserts its own StoreStore barrier.
   1344     dmb    ish
   1345     // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
   1346     // they should happen-after the implicit initialization check.
   1347     //
   1348     // TODO: Remove dmb for class initialization checks (b/36692143)
   1349 .endif
   1350     bx     lr
   1351 .endm
   1352 
   1353 // The common code for art_quick_alloc_object_*region_tlab
   1354 .macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
   1355 ENTRY \name
   1356     // Fast path tlab allocation.
   1357     // r0: type, r9: Thread::Current
   1358     // r1, r2, r3, r12: free.
   1359     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
   1360 .Lslow_path\name:
   1361     SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
   1362     mov    r1, r9                                             // Pass Thread::Current.
   1363     bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
   1364     RESTORE_SAVE_REFS_ONLY_FRAME
   1365     REFRESH_MARKING_REGISTER
   1366     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1367 END \name
   1368 .endm
   1369 
   1370 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
   1371 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
   1372 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
   1373 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
   1374 
   1375 
   1376 // The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
   1377 // and art_quick_alloc_array_resolved/initialized_region_tlab.
   1378 //
   1379 // r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free.
   1380 // Need to preserve r0 and r1 to the slow path.
   1381 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
   1382     and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
   1383                                                               // (addr + 7) & ~7.
   1384 
   1385                                                               // Load thread_local_pos (r3) and
   1386                                                               // thread_local_end (r12) with ldrd.
   1387                                                               // Check constraints for ldrd.
   1388 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
   1389 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
   1390 #endif
   1391     ldrd   r3, r12, [r9, #THREAD_LOCAL_POS_OFFSET]
   1392     sub    r12, r12, r3                                       // Compute the remaining buf size.
   1393     cmp    r2, r12                                            // Check if the total_size fits.
   1394     // The array class is always initialized here. Unlike new-instance,
   1395     // this does not act as a double test.
   1396     bhi    \slowPathLabel
   1397     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1398     add    r2, r2, r3
   1399     str    r2, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
   1400     ldr    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
   1401     add    r2, r2, #1
   1402     str    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
   1403     POISON_HEAP_REF r0
   1404     str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1405     str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
   1406                                                               // Fence. This is "ish" not "ishst" so
   1407                                                               // that the code after this allocation
   1408                                                               // site will see the right values in
   1409                                                               // the fields of the class.
   1410     mov    r0, r3
   1411 // new-array is special. The class is loaded and immediately goes to the Initialized state
   1412 // before it is published. Therefore the only fence needed is for the publication of the object.
   1413 // See ClassLinker::CreateArrayClass() for more details.
   1414 
   1415 // For publication of the new array, we don't need a 'dmb ishst' here.
   1416 // The compiler generates 'dmb ishst' for all new-array insts.
   1417     bx     lr
   1418 .endm
   1419 
   1420 .macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
   1421 ENTRY \name
   1422     // Fast path array allocation for region tlab allocation.
   1423     // r0: mirror::Class* type
   1424     // r1: int32_t component_count
   1425     // r9: thread
   1426     // r2, r3, r12: free.
   1427     \size_setup .Lslow_path\name
   1428     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
   1429 .Lslow_path\name:
   1430     // r0: mirror::Class* klass
   1431     // r1: int32_t component_count
   1432     // r2: Thread* self
   1433     SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
   1434     mov    r2, r9                  // pass Thread::Current
   1435     bl     \entrypoint
   1436     RESTORE_SAVE_REFS_ONLY_FRAME
   1437     REFRESH_MARKING_REGISTER
   1438     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1439 END \name
   1440 .endm
   1441 
   1442 .macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
   1443     bkpt                                                    // We should never enter here.
   1444                                                             // Code below is for reference.
   1445                                                             // Possibly a large object, go slow.
   1446                                                             // Also does negative array size check.
   1447     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
   1448     cmp r1, r2
   1449     bhi \slow_path
   1450                                                             // Array classes are never finalizable
   1451                                                             // or uninitialized, no need to check.
   1452     ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
   1453     UNPOISON_HEAP_REF r3
   1454     ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
   1455     lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
   1456                                                             // bits.
   1457     lsl    r2, r1, r3                                       // Calculate data size
   1458                                                             // Add array data offset and alignment.
   1459     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1460 #if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
   1461 #error Long array data offset must be 4 greater than int array data offset.
   1462 #endif
   1463 
   1464     add    r3, r3, #1                                       // Add 4 to the length only if the
   1465                                                             // component size shift is 3
   1466                                                             // (for 64 bit alignment).
   1467     and    r3, r3, #4
   1468     add    r2, r2, r3
   1469 .endm
   1470 
   1471 .macro COMPUTE_ARRAY_SIZE_8 slow_path
   1472     // Possibly a large object, go slow.
   1473     // Also does negative array size check.
   1474     movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
   1475     cmp r1, r2
   1476     bhi \slow_path
   1477     // Add array data offset and alignment.
   1478     add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1479 .endm
   1480 
   1481 .macro COMPUTE_ARRAY_SIZE_16 slow_path
   1482     // Possibly a large object, go slow.
   1483     // Also does negative array size check.
   1484     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
   1485     cmp r1, r2
   1486     bhi \slow_path
   1487     lsl    r2, r1, #1
   1488     // Add array data offset and alignment.
   1489     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1490 .endm
   1491 
   1492 .macro COMPUTE_ARRAY_SIZE_32 slow_path
   1493     // Possibly a large object, go slow.
   1494     // Also does negative array size check.
   1495     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
   1496     cmp r1, r2
   1497     bhi \slow_path
   1498     lsl    r2, r1, #2
   1499     // Add array data offset and alignment.
   1500     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1501 .endm
   1502 
   1503 .macro COMPUTE_ARRAY_SIZE_64 slow_path
   1504     // Possibly a large object, go slow.
   1505     // Also does negative array size check.
   1506     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
   1507     cmp r1, r2
   1508     bhi \slow_path
   1509     lsl    r2, r1, #3
   1510     // Add array data offset and alignment.
   1511     add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1512 .endm
   1513 
   1514 // TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
   1515 // the entrypoint once all backends have been updated to use the size variants.
   1516 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1517 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
   1518 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
   1519 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
   1520 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
   1521 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1522 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
   1523 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
   1524 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
   1525 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
   1526 
   1527     /*
   1528      * Called by managed code when the value in rSUSPEND has been decremented to 0.
   1529      */
   1530     .extern artTestSuspendFromCode
   1531 ENTRY art_quick_test_suspend
   1532 #ifdef ARM_R4_SUSPEND_FLAG
   1533     ldrh   rSUSPEND, [rSELF, #THREAD_FLAGS_OFFSET]
   1534     cbnz   rSUSPEND, 1f                         @ check Thread::Current()->suspend_count_ == 0
   1535     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
   1536     bx     lr                                   @ return if suspend_count_ == 0
   1537 1:
   1538     mov    rSUSPEND, #SUSPEND_CHECK_INTERVAL    @ reset rSUSPEND to SUSPEND_CHECK_INTERVAL
   1539 #endif
   1540     SETUP_SAVE_EVERYTHING_FRAME r0              @ save everything for GC stack crawl
   1541     mov    r0, rSELF
   1542     bl     artTestSuspendFromCode               @ (Thread*)
   1543     RESTORE_SAVE_EVERYTHING_FRAME
   1544     REFRESH_MARKING_REGISTER
   1545     bx     lr
   1546 END art_quick_test_suspend
   1547 
   1548 ENTRY art_quick_implicit_suspend
   1549     mov    r0, rSELF
   1550     SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
   1551     bl     artTestSuspendFromCode             @ (Thread*)
   1552     RESTORE_SAVE_REFS_ONLY_FRAME
   1553     REFRESH_MARKING_REGISTER
   1554     bx     lr
   1555 END art_quick_implicit_suspend
   1556 
   1557     /*
   1558      * Called by managed code that is attempting to call a method on a proxy class. On entry
   1559      * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
   1560      * frame size of the invoked proxy method agrees with a ref and args callee save frame.
   1561      */
   1562      .extern artQuickProxyInvokeHandler
   1563 ENTRY art_quick_proxy_invoke_handler
   1564     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
   1565     mov     r2, r9                 @ pass Thread::Current
   1566     mov     r3, sp                 @ pass SP
   1567     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
   1568     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1569     // Tear down the callee-save frame. Skip arg registers.
   1570     add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1571     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1572     RESTORE_SAVE_REFS_ONLY_FRAME
   1573     REFRESH_MARKING_REGISTER
   1574     cbnz    r2, 1f                 @ success if no exception is pending
   1575     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1576     bx      lr                     @ return on success
   1577 1:
   1578     DELIVER_PENDING_EXCEPTION
   1579 END art_quick_proxy_invoke_handler
   1580 
   1581     /*
   1582      * Called to resolve an imt conflict.
   1583      * r0 is the conflict ArtMethod.
   1584      * r12 is a hidden argument that holds the target interface method's dex method index.
   1585      *
   1586      * Note that this stub writes to r0, r4, and r12.
   1587      */
   1588     .extern artLookupResolvedMethod
   1589 ENTRY art_quick_imt_conflict_trampoline
   1590     push    {r1-r2}
   1591     .cfi_adjust_cfa_offset (2 * 4)
   1592     .cfi_rel_offset r1, 0
   1593     .cfi_rel_offset r2, 4
   1594     ldr     r4, [sp, #(2 * 4)]  // Load referrer.
   1595     ubfx    r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
   1596     ldr     r4, [r4, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_32]   // Load dex cache methods array
   1597     add     r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
   1598     ldr     r2, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
   1599 
   1600 // FIXME: Configure the build to use the faster code when appropriate.
   1601 //        Currently we fall back to the slower version.
   1602 #if HAS_ATOMIC_LDRD
   1603     ldrd    r0, r1, [r4]
   1604 #else
   1605     push    {r3}
   1606     .cfi_adjust_cfa_offset 4
   1607     .cfi_rel_offset r3, 0
   1608 .Limt_conflict_trampoline_retry_load:
   1609     ldrexd  r0, r1, [r4]
   1610     strexd  r3, r0, r1, [r4]
   1611     cmp     r3, #0
   1612     bne     .Limt_conflict_trampoline_retry_load
   1613     pop     {r3}
   1614     .cfi_adjust_cfa_offset -4
   1615     .cfi_restore r3
   1616 #endif
   1617 
   1618     ldr     r4, [r2]  // Load first entry in ImtConflictTable.
   1619     cmp     r1, r12   // Compare method index to see if we had a DexCache method hit.
   1620     bne     .Limt_conflict_trampoline_dex_cache_miss
   1621 .Limt_table_iterate:
   1622     cmp     r4, r0
   1623     // Branch if found. Benchmarks have shown doing a branch here is better.
   1624     beq     .Limt_table_found
   1625     // If the entry is null, the interface method is not in the ImtConflictTable.
   1626     cbz     r4, .Lconflict_trampoline
   1627     // Iterate over the entries of the ImtConflictTable.
   1628     ldr     r4, [r2, #(2 * __SIZEOF_POINTER__)]!
   1629     b .Limt_table_iterate
   1630 .Limt_table_found:
   1631     // We successfully hit an entry in the table. Load the target method
   1632     // and jump to it.
   1633     ldr     r0, [r2, #__SIZEOF_POINTER__]
   1634     .cfi_remember_state
   1635     pop     {r1-r2}
   1636     .cfi_adjust_cfa_offset -(2 * 4)
   1637     .cfi_restore r1
   1638     .cfi_restore r2
   1639     ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
   1640     .cfi_restore_state
   1641 .Lconflict_trampoline:
   1642     // Call the runtime stub to populate the ImtConflictTable and jump to the
   1643     // resolved method.
   1644     .cfi_remember_state
   1645     pop     {r1-r2}
   1646     .cfi_adjust_cfa_offset -(2 * 4)
   1647     .cfi_restore r1
   1648     .cfi_restore r2
   1649     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
   1650     .cfi_restore_state
   1651 .Limt_conflict_trampoline_dex_cache_miss:
   1652     // We're not creating a proper runtime method frame here,
   1653     // artLookupResolvedMethod() is not allowed to walk the stack.
   1654 
   1655     // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
   1656     push    {r2-r4, lr}
   1657     .cfi_adjust_cfa_offset (4 * 4)
   1658     .cfi_rel_offset r3, 4
   1659     .cfi_rel_offset lr, 12
   1660     // Save FPR args.
   1661     vpush   {d0-d7}
   1662     .cfi_adjust_cfa_offset (8 * 8)
   1663 
   1664     mov     r0, ip                      // Pass method index.
   1665     ldr     r1, [sp, #(8 * 8 + 6 * 4)]  // Pass referrer.
   1666     bl      artLookupResolvedMethod     // (uint32_t method_index, ArtMethod* referrer)
   1667 
   1668     // Restore FPR args.
   1669     vpop    {d0-d7}
   1670     .cfi_adjust_cfa_offset -(8 * 8)
   1671     // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
   1672     pop     {r2-r4, lr}
   1673     .cfi_adjust_cfa_offset -(4 * 4)
   1674     .cfi_restore r3
   1675     .cfi_restore lr
   1676 
   1677     cmp     r0, #0                  // If the method wasn't resolved,
   1678     beq     .Lconflict_trampoline   //   skip the lookup and go to artInvokeInterfaceTrampoline().
   1679     b       .Limt_table_iterate
   1680 END art_quick_imt_conflict_trampoline
   1681 
   1682     .extern artQuickResolutionTrampoline
   1683 ENTRY art_quick_resolution_trampoline
   1684     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   1685     mov     r2, r9                 @ pass Thread::Current
   1686     mov     r3, sp                 @ pass SP
   1687     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
   1688     cbz     r0, 1f                 @ is code pointer null? goto exception
   1689     mov     r12, r0
   1690     ldr     r0, [sp, #0]           @ load resolved method in r0
   1691     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1692     REFRESH_MARKING_REGISTER
   1693     bx      r12                    @ tail-call into actual code
   1694 1:
   1695     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1696     DELIVER_PENDING_EXCEPTION
   1697 END art_quick_resolution_trampoline
   1698 
   1699     /*
   1700      * Called to do a generic JNI down-call
   1701      */
   1702 ENTRY art_quick_generic_jni_trampoline
   1703     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
   1704 
   1705     // Save rSELF
   1706     mov r11, rSELF
   1707     // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
   1708     mov r10, sp
   1709     .cfi_def_cfa_register r10
   1710 
   1711     sub sp, sp, #5120
   1712 
   1713     // prepare for artQuickGenericJniTrampoline call
   1714     // (Thread*,  SP)
   1715     //    r0      r1   <= C calling convention
   1716     //  rSELF     r10  <= where they are
   1717 
   1718     mov r0, rSELF   // Thread*
   1719     mov r1, r10
   1720     blx artQuickGenericJniTrampoline  // (Thread*, sp)
   1721 
   1722     // The C call will have registered the complete save-frame on success.
   1723     // The result of the call is:
   1724     // r0: pointer to native code, 0 on error.
   1725     // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
   1726 
   1727     // Check for error = 0.
   1728     cbz r0, .Lexception_in_native
   1729 
   1730     // Release part of the alloca.
   1731     mov sp, r1
   1732 
   1733     // Save the code pointer
   1734     mov r12, r0
   1735 
   1736     // Load parameters from frame into registers.
   1737     pop {r0-r3}
   1738 
   1739     // Softfloat.
   1740     // TODO: Change to hardfloat when supported.
   1741 
   1742     blx r12           // native call.
   1743 
   1744     // result sign extension is handled in C code
   1745     // prepare for artQuickGenericJniEndTrampoline call
   1746     // (Thread*, result, result_f)
   1747     //    r0      r2,r3    stack       <= C calling convention
   1748     //    r11     r0,r1    r0,r1          <= where they are
   1749     sub sp, sp, #8 // Stack alignment.
   1750 
   1751     push {r0-r1}
   1752     mov r3, r1
   1753     mov r2, r0
   1754     mov r0, r11
   1755 
   1756     blx artQuickGenericJniEndTrampoline
   1757 
   1758     // Restore self pointer.
   1759     mov r9, r11
   1760 
   1761     // Pending exceptions possible.
   1762     ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1763     cbnz r2, .Lexception_in_native
   1764 
   1765     // Tear down the alloca.
   1766     mov sp, r10
   1767     .cfi_def_cfa_register sp
   1768 
   1769     // Tear down the callee-save frame. Skip arg registers.
   1770     add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
   1771     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
   1772     RESTORE_SAVE_REFS_ONLY_FRAME
   1773     REFRESH_MARKING_REGISTER
   1774 
   1775     // store into fpr, for when it's a fpr return...
   1776     vmov d0, r0, r1
   1777     bx lr      // ret
   1778     // Undo the unwinding information from above since it doesn't apply below.
   1779     .cfi_def_cfa_register r10
   1780     .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
   1781 
   1782 .Lexception_in_native:
   1783     ldr sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
   1784     .cfi_def_cfa_register sp
   1785     # This will create a new save-all frame, required by the runtime.
   1786     DELIVER_PENDING_EXCEPTION
   1787 END art_quick_generic_jni_trampoline
   1788 
   1789     .extern artQuickToInterpreterBridge
   1790 ENTRY art_quick_to_interpreter_bridge
   1791     SETUP_SAVE_REFS_AND_ARGS_FRAME r1
   1792     mov     r1, r9                 @ pass Thread::Current
   1793     mov     r2, sp                 @ pass SP
   1794     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
   1795     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1796     // Tear down the callee-save frame. Skip arg registers.
   1797     add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1798     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1799     RESTORE_SAVE_REFS_ONLY_FRAME
   1800     REFRESH_MARKING_REGISTER
   1801     cbnz    r2, 1f                 @ success if no exception is pending
   1802     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1803     bx      lr                     @ return on success
   1804 1:
   1805     DELIVER_PENDING_EXCEPTION
   1806 END art_quick_to_interpreter_bridge
   1807 
   1808 /*
   1809  * Called to attempt to execute an obsolete method.
   1810  */
   1811 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
   1812 
   1813     /*
   1814      * Routine that intercepts method calls and returns.
   1815      */
   1816     .extern artInstrumentationMethodEntryFromCode
   1817     .extern artInstrumentationMethodExitFromCode
   1818 ENTRY art_quick_instrumentation_entry
   1819     @ Make stack crawlable and clobber r2 and r3 (post saving)
   1820     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   1821     @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
   1822     str   r0, [sp, #4]
   1823     mov   r2, r9         @ pass Thread::Current
   1824     mov   r3, sp         @ pass SP
   1825     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
   1826     cbz   r0, .Ldeliver_instrumentation_entry_exception
   1827                          @ Deliver exception if we got nullptr as function.
   1828     mov   r12, r0        @ r12 holds reference to code
   1829     ldr   r0, [sp, #4]   @ restore r0
   1830     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1831     REFRESH_MARKING_REGISTER
   1832     blx   r12            @ call method with lr set to art_quick_instrumentation_exit
   1833 @ Deliberate fall-through into art_quick_instrumentation_exit.
   1834     .type art_quick_instrumentation_exit, #function
   1835     .global art_quick_instrumentation_exit
   1836 art_quick_instrumentation_exit:
   1837     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
   1838     SETUP_SAVE_REFS_ONLY_FRAME r2  @ set up frame knowing r2 and r3 must be dead on exit
   1839     mov   r12, sp        @ remember bottom of caller's frame
   1840     push  {r0-r1}        @ save return value
   1841     .cfi_adjust_cfa_offset 8
   1842     .cfi_rel_offset r0, 0
   1843     .cfi_rel_offset r1, 4
   1844     mov   r2, sp         @ store gpr_res pointer.
   1845     vpush {d0}           @ save fp return value
   1846     .cfi_adjust_cfa_offset 8
   1847     mov   r3, sp         @ store fpr_res pointer
   1848     mov   r1, r12        @ pass SP
   1849     mov   r0, r9         @ pass Thread::Current
   1850     blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
   1851 
   1852     mov   r2, r0         @ link register saved by instrumentation
   1853     mov   lr, r1         @ r1 is holding link register if we're to bounce to deoptimize
   1854     vpop  {d0}           @ restore fp return value
   1855     .cfi_adjust_cfa_offset -8
   1856     pop   {r0, r1}       @ restore return value
   1857     .cfi_adjust_cfa_offset -8
   1858     .cfi_restore r0
   1859     .cfi_restore r1
   1860     RESTORE_SAVE_REFS_ONLY_FRAME
   1861     REFRESH_MARKING_REGISTER
   1862     cbz   r2, .Ldo_deliver_instrumentation_exception
   1863                          @ Deliver exception if we got nullptr as function.
   1864     bx    r2             @ Otherwise, return
   1865 .Ldeliver_instrumentation_entry_exception:
   1866     @ Deliver exception for art_quick_instrumentation_entry placed after
   1867     @ art_quick_instrumentation_exit so that the fallthrough works.
   1868     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1869 .Ldo_deliver_instrumentation_exception:
   1870     DELIVER_PENDING_EXCEPTION
   1871 END art_quick_instrumentation_entry
   1872 
   1873     /*
   1874      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   1875      * will long jump to the upcall with a special exception of -1.
   1876      */
   1877     .extern artDeoptimize
   1878 ENTRY art_quick_deoptimize
   1879     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0
   1880     mov    r0, r9         @ pass Thread::Current
   1881     blx    artDeoptimize  @ (Thread*)
   1882 END art_quick_deoptimize
   1883 
   1884     /*
   1885      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
   1886      * will long jump to the interpreter bridge.
   1887      */
   1888     .extern artDeoptimizeFromCompiledCode
   1889 ENTRY art_quick_deoptimize_from_compiled_code
   1890     SETUP_SAVE_EVERYTHING_FRAME r1
   1891     mov    r1, r9                         @ pass Thread::Current
   1892     blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
   1893 END art_quick_deoptimize_from_compiled_code
   1894 
   1895     /*
   1896      * Signed 64-bit integer multiply.
   1897      *
   1898      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
   1899      *        WX
   1900      *      x YZ
   1901      *  --------
   1902      *     ZW ZX
   1903      *  YW YX
   1904      *
   1905      * The low word of the result holds ZX, the high word holds
   1906      * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
   1907      * it doesn't fit in the low 64 bits.
   1908      *
   1909      * Unlike most ARM math operations, multiply instructions have
   1910      * restrictions on using the same register more than once (Rd and Rm
   1911      * cannot be the same).
   1912      */
   1913     /* mul-long vAA, vBB, vCC */
   1914 ENTRY art_quick_mul_long
   1915     push    {r9-r10}
   1916     .cfi_adjust_cfa_offset 8
   1917     .cfi_rel_offset r9, 0
   1918     .cfi_rel_offset r10, 4
   1919     mul     ip, r2, r1                  @  ip<- ZxW
   1920     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
   1921     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
   1922     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
   1923     mov     r0,r9
   1924     mov     r1,r10
   1925     pop     {r9-r10}
   1926     .cfi_adjust_cfa_offset -8
   1927     .cfi_restore r9
   1928     .cfi_restore r10
   1929     bx      lr
   1930 END art_quick_mul_long
   1931 
   1932     /*
   1933      * Long integer shift.  This is different from the generic 32/64-bit
   1934      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1935      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1936      * 6 bits.
   1937      * On entry:
   1938      *   r0: low word
   1939      *   r1: high word
   1940      *   r2: shift count
   1941      */
   1942     /* shl-long vAA, vBB, vCC */
   1943 ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
   1944     and     r2, r2, #63                 @ r2<- r2 & 0x3f
   1945     mov     r1, r1, asl r2              @  r1<- r1 << r2
   1946     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1947     orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
   1948     subs    ip, r2, #32                 @  ip<- r2 - 32
   1949     movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
   1950     mov     r0, r0, asl r2              @  r0<- r0 << r2
   1951     bx      lr
   1952 END art_quick_shl_long
   1953 
   1954     /*
   1955      * Long integer shift.  This is different from the generic 32/64-bit
   1956      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1957      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1958      * 6 bits.
   1959      * On entry:
   1960      *   r0: low word
   1961      *   r1: high word
   1962      *   r2: shift count
   1963      */
   1964     /* shr-long vAA, vBB, vCC */
   1965 ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
   1966     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   1967     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   1968     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1969     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   1970     subs    ip, r2, #32                 @  ip<- r2 - 32
   1971     movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
   1972     mov     r1, r1, asr r2              @  r1<- r1 >> r2
   1973     bx      lr
   1974 END art_quick_shr_long
   1975 
   1976     /*
   1977      * Long integer shift.  This is different from the generic 32/64-bit
   1978      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1979      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1980      * 6 bits.
   1981      * On entry:
   1982      *   r0: low word
   1983      *   r1: high word
   1984      *   r2: shift count
   1985      */
   1986     /* ushr-long vAA, vBB, vCC */
   1987 ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
   1988     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   1989     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   1990     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1991     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   1992     subs    ip, r2, #32                 @  ip<- r2 - 32
   1993     movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
   1994     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
   1995     bx      lr
   1996 END art_quick_ushr_long
   1997 
   1998     /*
   1999      * String's indexOf.
   2000      *
   2001      * On entry:
   2002      *    r0:   string object (known non-null)
   2003      *    r1:   char to match (known <= 0xFFFF)
   2004      *    r2:   Starting offset in string data
   2005      */
   2006 ENTRY art_quick_indexof
   2007     push {r4, r10-r11, lr} @ 4 words of callee saves
   2008     .cfi_adjust_cfa_offset 16
   2009     .cfi_rel_offset r4, 0
   2010     .cfi_rel_offset r10, 4
   2011     .cfi_rel_offset r11, 8
   2012     .cfi_rel_offset lr, 12
   2013 #if (STRING_COMPRESSION_FEATURE)
   2014     ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
   2015 #else
   2016     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
   2017 #endif
   2018     add   r0, #MIRROR_STRING_VALUE_OFFSET
   2019 #if (STRING_COMPRESSION_FEATURE)
   2020     /* r4 count (with flag) and r3 holds actual length */
   2021     lsr   r3, r4, #1
   2022 #endif
   2023     /* Clamp start to [0..count] */
   2024     cmp   r2, #0
   2025     it    lt
   2026     movlt r2, #0
   2027     cmp   r2, r3
   2028     it    gt
   2029     movgt r2, r3
   2030 
   2031     /* Save a copy in r12 to later compute result */
   2032     mov   r12, r0
   2033 
   2034     /* Build pointer to start of data to compare and pre-bias */
   2035 #if (STRING_COMPRESSION_FEATURE)
   2036     lsrs  r4, r4, #1
   2037     bcc   .Lstring_indexof_compressed
   2038 #endif
   2039     add   r0, r0, r2, lsl #1
   2040     sub   r0, #2
   2041 
   2042     /* Compute iteration count */
   2043     sub   r2, r3, r2
   2044 
   2045     /*
   2046      * At this point we have:
   2047      *   r0: start of data to test
   2048      *   r1: char to compare
   2049      *   r2: iteration count
   2050      *   r4: compression style (used temporarily)
   2051      *   r12: original start of string data
   2052      *   r3, r4, r10, r11 available for loading string data
   2053      */
   2054 
   2055     subs  r2, #4
   2056     blt   .Lindexof_remainder
   2057 
   2058 .Lindexof_loop4:
   2059     ldrh  r3, [r0, #2]!
   2060     ldrh  r4, [r0, #2]!
   2061     ldrh  r10, [r0, #2]!
   2062     ldrh  r11, [r0, #2]!
   2063     cmp   r3, r1
   2064     beq   .Lmatch_0
   2065     cmp   r4, r1
   2066     beq   .Lmatch_1
   2067     cmp   r10, r1
   2068     beq   .Lmatch_2
   2069     cmp   r11, r1
   2070     beq   .Lmatch_3
   2071     subs  r2, #4
   2072     bge   .Lindexof_loop4
   2073 
   2074 .Lindexof_remainder:
   2075     adds  r2, #4
   2076     beq   .Lindexof_nomatch
   2077 
   2078 .Lindexof_loop1:
   2079     ldrh  r3, [r0, #2]!
   2080     cmp   r3, r1
   2081     beq   .Lmatch_3
   2082     subs  r2, #1
   2083     bne   .Lindexof_loop1
   2084 
   2085 .Lindexof_nomatch:
   2086     mov   r0, #-1
   2087     pop {r4, r10-r11, pc}
   2088 
   2089 .Lmatch_0:
   2090     sub   r0, #6
   2091     sub   r0, r12
   2092     asr   r0, r0, #1
   2093     pop {r4, r10-r11, pc}
   2094 .Lmatch_1:
   2095     sub   r0, #4
   2096     sub   r0, r12
   2097     asr   r0, r0, #1
   2098     pop {r4, r10-r11, pc}
   2099 .Lmatch_2:
   2100     sub   r0, #2
   2101     sub   r0, r12
   2102     asr   r0, r0, #1
   2103     pop {r4, r10-r11, pc}
   2104 .Lmatch_3:
   2105     sub   r0, r12
   2106     asr   r0, r0, #1
   2107     pop {r4, r10-r11, pc}
   2108 #if (STRING_COMPRESSION_FEATURE)
   2109 .Lstring_indexof_compressed:
   2110     add   r0, r0, r2
   2111     sub   r0, #1
   2112     sub   r2, r3, r2
   2113 .Lstring_indexof_compressed_loop:
   2114     subs  r2, #1
   2115     blt   .Lindexof_nomatch
   2116     ldrb  r3, [r0, #1]!
   2117     cmp   r3, r1
   2118     beq   .Lstring_indexof_compressed_matched
   2119     b     .Lstring_indexof_compressed_loop
   2120 .Lstring_indexof_compressed_matched:
   2121     sub   r0, r12
   2122     pop {r4, r10-r11, pc}
   2123 #endif
   2124 END art_quick_indexof
   2125 
   2126     /* Assembly routines used to handle ABI differences. */
   2127 
   2128     /* double fmod(double a, double b) */
   2129     .extern fmod
   2130 ENTRY art_quick_fmod
   2131     push  {lr}
   2132     .cfi_adjust_cfa_offset 4
   2133     .cfi_rel_offset lr, 0
   2134     sub   sp, #4
   2135     .cfi_adjust_cfa_offset 4
   2136     vmov  r0, r1, d0
   2137     vmov  r2, r3, d1
   2138     bl    fmod
   2139     vmov  d0, r0, r1
   2140     add   sp, #4
   2141     .cfi_adjust_cfa_offset -4
   2142     pop   {pc}
   2143 END art_quick_fmod
   2144 
   2145     /* float fmodf(float a, float b) */
   2146      .extern fmodf
   2147 ENTRY art_quick_fmodf
   2148     push  {lr}
   2149     .cfi_adjust_cfa_offset 4
   2150     .cfi_rel_offset lr, 0
   2151     sub   sp, #4
   2152     .cfi_adjust_cfa_offset 4
   2153     vmov  r0, r1, d0
   2154     bl    fmodf
   2155     vmov  s0, r0
   2156     add   sp, #4
   2157     .cfi_adjust_cfa_offset -4
   2158     pop   {pc}
   2159 END art_quick_fmodf
   2160 
   2161     /* int64_t art_d2l(double d) */
   2162     .extern art_d2l
   2163 ENTRY art_quick_d2l
   2164     vmov  r0, r1, d0
   2165     b     art_d2l
   2166 END art_quick_d2l
   2167 
   2168     /* int64_t art_f2l(float f) */
   2169     .extern art_f2l
   2170 ENTRY art_quick_f2l
   2171     vmov  r0, s0
   2172     b     art_f2l
   2173 END art_quick_f2l
   2174 
   2175     /* float art_l2f(int64_t l) */
   2176     .extern art_l2f
   2177 ENTRY art_quick_l2f
   2178     push  {lr}
   2179     .cfi_adjust_cfa_offset 4
   2180     .cfi_rel_offset lr, 0
   2181     sub   sp, #4
   2182     .cfi_adjust_cfa_offset 4
   2183     bl    art_l2f
   2184     vmov  s0, r0
   2185     add   sp, #4
   2186     .cfi_adjust_cfa_offset -4
   2187     pop   {pc}
   2188 END art_quick_l2f
   2189 
   2190 .macro CONDITIONAL_CBZ reg, reg_if, dest
   2191 .ifc \reg, \reg_if
   2192     cbz \reg, \dest
   2193 .endif
   2194 .endm
   2195 
   2196 .macro CONDITIONAL_CMPBZ reg, reg_if, dest
   2197 .ifc \reg, \reg_if
   2198     cmp \reg, #0
   2199     beq \dest
   2200 .endif
   2201 .endm
   2202 
   2203 // Use CBZ if the register is in {r0, r7} otherwise compare and branch.
   2204 .macro SMART_CBZ reg, dest
   2205     CONDITIONAL_CBZ \reg, r0, \dest
   2206     CONDITIONAL_CBZ \reg, r1, \dest
   2207     CONDITIONAL_CBZ \reg, r2, \dest
   2208     CONDITIONAL_CBZ \reg, r3, \dest
   2209     CONDITIONAL_CBZ \reg, r4, \dest
   2210     CONDITIONAL_CBZ \reg, r5, \dest
   2211     CONDITIONAL_CBZ \reg, r6, \dest
   2212     CONDITIONAL_CBZ \reg, r7, \dest
   2213     CONDITIONAL_CMPBZ \reg, r8, \dest
   2214     CONDITIONAL_CMPBZ \reg, r9, \dest
   2215     CONDITIONAL_CMPBZ \reg, r10, \dest
   2216     CONDITIONAL_CMPBZ \reg, r11, \dest
   2217     CONDITIONAL_CMPBZ \reg, r12, \dest
   2218     CONDITIONAL_CMPBZ \reg, r13, \dest
   2219     CONDITIONAL_CMPBZ \reg, r14, \dest
   2220     CONDITIONAL_CMPBZ \reg, r15, \dest
   2221 .endm
   2222 
   2223     /*
   2224      * Create a function `name` calling the ReadBarrier::Mark routine,
   2225      * getting its argument and returning its result through register
   2226      * `reg`, saving and restoring all caller-save registers.
   2227      *
   2228      * IP is clobbered; `reg` must not be IP.
   2229      *
   2230      * If `reg` is different from `r0`, the generated function follows a
   2231      * non-standard runtime calling convention:
   2232      * - register `reg` is used to pass the (sole) argument of this
   2233      *   function (instead of R0);
   2234      * - register `reg` is used to return the result of this function
   2235      *   (instead of R0);
   2236      * - R0 is treated like a normal (non-argument) caller-save register;
   2237      * - everything else is the same as in the standard runtime calling
   2238      *   convention (e.g. standard callee-save registers are preserved).
   2239      */
   2240 .macro READ_BARRIER_MARK_REG name, reg
   2241 ENTRY \name
   2242     // Null check so that we can load the lock word.
   2243     SMART_CBZ \reg, .Lret_rb_\name
   2244     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
   2245     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2246     tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
   2247     beq .Lnot_marked_rb_\name
   2248     // Already marked, return right away.
   2249 .Lret_rb_\name:
   2250     bx lr
   2251 
   2252 .Lnot_marked_rb_\name:
   2253     // Test that both the forwarding state bits are 1.
   2254 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
   2255     // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
   2256     // the highest bits and the "forwarding address" state to have all bits set.
   2257 #error "Unexpected lock word state shift or forwarding address state value."
   2258 #endif
   2259     cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
   2260     bhs .Lret_forwarding_address\name
   2261 
   2262 .Lslow_rb_\name:
   2263     // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
   2264     // make a tail call here. Currently, it serves only for stack alignment but
   2265     // we may reintroduce kSaveEverything calls here in the future.
   2266     push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
   2267     .cfi_adjust_cfa_offset 32
   2268     .cfi_rel_offset r0, 0
   2269     .cfi_rel_offset r1, 4
   2270     .cfi_rel_offset r2, 8
   2271     .cfi_rel_offset r3, 12
   2272     .cfi_rel_offset r4, 16
   2273     .cfi_rel_offset r9, 20
   2274     .cfi_rel_offset ip, 24
   2275     .cfi_rel_offset lr, 28
   2276 
   2277     .ifnc \reg, r0
   2278       mov   r0, \reg                    @ pass arg1 - obj from `reg`
   2279     .endif
   2280 
   2281     vpush {s0-s15}                      @ save floating-point caller-save registers
   2282     .cfi_adjust_cfa_offset 64
   2283     bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
   2284     vpop {s0-s15}                       @ restore floating-point registers
   2285     .cfi_adjust_cfa_offset -64
   2286 
   2287     .ifc \reg, r0                       @ Save result to the stack slot or destination register.
   2288       str r0, [sp, #0]
   2289     .else
   2290       .ifc \reg, r1
   2291         str r0, [sp, #4]
   2292       .else
   2293         .ifc \reg, r2
   2294           str r0, [sp, #8]
   2295         .else
   2296           .ifc \reg, r3
   2297             str r0, [sp, #12]
   2298           .else
   2299             .ifc \reg, r4
   2300               str r0, [sp, #16]
   2301             .else
   2302               .ifc \reg, r9
   2303                 str r0, [sp, #20]
   2304               .else
   2305                 mov \reg, r0
   2306               .endif
   2307             .endif
   2308           .endif
   2309         .endif
   2310       .endif
   2311     .endif
   2312 
   2313     pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
   2314     .cfi_adjust_cfa_offset -32
   2315     .cfi_restore r0
   2316     .cfi_restore r1
   2317     .cfi_restore r2
   2318     .cfi_restore r3
   2319     .cfi_restore r4
   2320     .cfi_restore r9
   2321     .cfi_restore ip
   2322     .cfi_restore lr
   2323     bx lr
   2324 .Lret_forwarding_address\name:
   2325     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2326     // in the top 2 bits of the lock word.
   2327     lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2328     bx lr
   2329 END \name
   2330 .endm
   2331 
   2332 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
   2333 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
   2334 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
   2335 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
   2336 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
   2337 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
   2338 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
   2339 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
   2340 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
   2341 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
   2342 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
   2343 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
   2344 
   2345 // Helper macros for Baker CC read barrier mark introspection (BRBMI).
   2346 .macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
   2347     \macro_for_register r0
   2348     \macro_for_register r1
   2349     \macro_for_register r2
   2350     \macro_for_register r3
   2351     \macro_for_reserved_register  // R4 is reserved for the entrypoint address.
   2352     \macro_for_register r5
   2353     \macro_for_register r6
   2354     \macro_for_register r7
   2355     \macro_for_register r8
   2356     \macro_for_register r9
   2357     \macro_for_register r10
   2358     \macro_for_register r11
   2359 .endm
   2360 
   2361 .macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
   2362     BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
   2363     \macro_for_reserved_register  // IP is reserved.
   2364     \macro_for_reserved_register  // SP is reserved.
   2365     \macro_for_reserved_register  // LR is reserved.
   2366     \macro_for_reserved_register  // PC is reserved.
   2367 .endm
   2368 
   2369 .macro BRBMI_RETURN_SWITCH_CASE reg
   2370 .Lmark_introspection_return_switch_case_\reg:
   2371     mov     \reg, ip
   2372     bx      lr
   2373 .endm
   2374 
   2375 .macro BRBMI_BAD_RETURN_SWITCH_CASE
   2376 .Lmark_introspection_return_switch_case_bad:
   2377     BRBMI_BKPT_FILL_4B
   2378 .endm
   2379 
   2380 .macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
   2381     .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
   2382 .endm
   2383 
   2384 .macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
   2385     .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
   2386 .endm
   2387 
   2388 #if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
   2389 #error "Array and field introspection code sharing requires same LDR offset."
   2390 #endif
   2391 .macro BRBMI_ARRAY_LOAD index_reg
   2392     ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
   2393     b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
   2394     .balign 8                                           // Add padding to 8 bytes.
   2395 .endm
   2396 
   2397 .macro BRBMI_BKPT_FILL_4B
   2398     bkpt    0
   2399     bkpt    0
   2400 .endm
   2401 
   2402 .macro BRBMI_BKPT_FILL_8B
   2403     BRBMI_BKPT_FILL_4B
   2404     BRBMI_BKPT_FILL_4B
   2405 .endm
   2406 
   2407 .macro BRBMI_RUNTIME_CALL
   2408     // Note: This macro generates exactly 22 bytes of code. The core register
   2409     // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
   2410 
   2411     push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
   2412     .cfi_adjust_cfa_offset 24
   2413     .cfi_rel_offset r0, 0
   2414     .cfi_rel_offset r1, 4
   2415     .cfi_rel_offset r2, 8
   2416     .cfi_rel_offset r3, 12
   2417     .cfi_rel_offset r7, 16
   2418     .cfi_rel_offset lr, 20
   2419 
   2420     mov     r0, ip                    // Pass the reference.
   2421     vpush {s0-s15}                    // save floating-point caller-save registers
   2422     .cfi_adjust_cfa_offset 64
   2423     bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
   2424     vpop    {s0-s15}                  // restore floating-point registers
   2425     .cfi_adjust_cfa_offset -64
   2426     mov     ip, r0                    // Move reference to ip in preparation for return switch.
   2427 
   2428     pop     {r0-r3, r7, lr}           // Restore registers.
   2429     .cfi_adjust_cfa_offset -24
   2430     .cfi_restore r0
   2431     .cfi_restore r1
   2432     .cfi_restore r2
   2433     .cfi_restore r3
   2434     .cfi_restore r7
   2435     .cfi_restore lr
   2436 .endm
   2437 
   2438 .macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
   2439     // If reference is null, just return it in the right register.
   2440     cmp     ip, #0
   2441     beq     .Lmark_introspection_return\label_suffix
   2442     // Use R4 as temp and check the mark bit of the reference.
   2443     ldr     r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2444     tst     r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
   2445     beq     .Lmark_introspection_unmarked\label_suffix
   2446 .Lmark_introspection_return\label_suffix:
   2447 .endm
   2448 
   2449 .macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
   2450 .Lmark_introspection_unmarked\label_suffix:
   2451     // Check if the top two bits are one, if this is the case it is a forwarding address.
   2452 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
   2453     // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
   2454     // the highest bits and the "forwarding address" state to have all bits set.
   2455 #error "Unexpected lock word state shift or forwarding address state value."
   2456 #endif
   2457     cmp     r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
   2458     bhs     .Lmark_introspection_forwarding_address\label_suffix
   2459 .endm
   2460 
   2461 .macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
   2462 .Lmark_introspection_forwarding_address\label_suffix:
   2463     // Note: This macro generates exactly 22 bytes of code, the branch is near.
   2464 
   2465     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2466     // in the top 2 bits of the lock word.
   2467     lsl     ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2468     b       .Lmark_introspection_return\label_suffix
   2469 .endm
   2470 
   2471 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
   2472     // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
   2473     ldrh    r4, [lr, #(-1 + \ldr_offset + 2)]
   2474 .endm
   2475 
   2476 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
   2477     // Load the 16-bit instruction. Adjust for the thumb state in LR.
   2478     ldrh    r4, [lr, #(-1 + \ldr_offset)]
   2479 .endm
   2480 
   2481 .macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
   2482     .balign 64
   2483     .thumb_func
   2484     .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
   2485     .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
   2486     .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
   2487 art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
   2488     BRBMI_RUNTIME_CALL
   2489     // Load the LDR (or the half of it) that contains Rt.
   2490     BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
   2491     b       .Lmark_introspection_extract_register_and_return\label_suffix
   2492     // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
   2493     // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
   2494     // the 6 byte forwarding address extraction here across the 32-byte boundary.
   2495     BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
   2496     // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
   2497     // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
   2498     // branch) shall take the rest of the 32-byte section (within a cache line).
   2499     BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
   2500     BRBMI_RUNTIME_CALL
   2501     b       .Lmark_introspection_return\label_suffix
   2502 .endm
   2503 
   2504     /*
   2505      * Use introspection to load a reference from the same address as the LDR
   2506      * instruction in generated code would load (unless loaded by the thunk,
   2507      * see below), call ReadBarrier::Mark() with that reference if needed
   2508      * and return it in the same register as the LDR instruction would load.
   2509      *
   2510      * The entrypoint is called through a thunk that differs across load kinds.
   2511      * For field and array loads the LDR instruction in generated code follows
   2512      * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
   2513      * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
   2514      * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
   2515      * knows the holder and performs the gray bit check, returning to the LDR
   2516      * instruction if the object is not gray, so this entrypoint no longer
   2517      * needs to know anything about the holder. For GC root loads, the LDR
   2518      * instruction in generated code precedes the branch to the thunk, i.e. the
   2519      * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
   2520      * where the -1 is again the Thumb mode bit adjustment, and the thunk does
   2521      * not do the gray bit check.
   2522      *
   2523      * For field accesses and array loads with a constant index the thunk loads
   2524      * the reference into IP using introspection and calls the main entrypoint,
   2525      * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
   2526      * the passed reference is poisoned.
   2527      *
   2528      * For array accesses with non-constant index, the thunk inserts the bits
   2529      * 0-5 of the LDR instruction to the entrypoint address, effectively
   2530      * calculating a switch case label based on the index register (bits 0-3)
   2531      * and adding an extra offset (bits 4-5 hold the shift which is always 2
   2532      * for reference loads) to differentiate from the main entrypoint, then
   2533      * moves the base register to IP and jumps to the switch case. Therefore
   2534      * we need to align the main entrypoint to 512 bytes, accounting for
   2535      * a 256-byte offset followed by 16 array entrypoints starting at
   2536      * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
   2537      * (register) and a branch to the main entrypoint.
   2538      *
   2539      * For GC root accesses we cannot use the main entrypoint because of the
   2540      * different offset where the LDR instruction in generated code is located.
   2541      * (And even with heap poisoning enabled, GC roots are not poisoned.)
   2542      * To re-use the same entrypoint pointer in generated code, we make sure
   2543      * that the gc root entrypoint (a copy of the entrypoint with a different
   2544      * offset for introspection loads) is located at a known offset (128 bytes,
   2545      * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
   2546      * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
   2547      * the root register to IP and jumps to the customized entrypoint,
   2548      * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
   2549      * performs all the fast-path checks, so we need just the slow path.
   2550      *
   2551      * The code structure is
   2552      *   art_quick_read_barrier_mark_introspection:
   2553      *     Up to 32 bytes code for main entrypoint fast-path code for fields
   2554      *     (and array elements with constant offset) with LDR encoding T3;
   2555      *     jumps to the switch in the "narrow" entrypoint.
   2556      *     Padding to 32 bytes if needed.
   2557      *   art_quick_read_barrier_mark_introspection_narrow:
   2558      *     Up to 48 bytes code for fast path code for fields (and array
   2559      *     elements with constant offset) with LDR encoding T1, ending in the
   2560      *     return switch instruction TBB and the table with switch offsets.
   2561      *     Padding to 80 bytes if needed.
   2562      *   .Lmark_introspection_return_switch_case_r0:
   2563      *     Exactly 48 bytes of code for the return switch cases (12 cases,
   2564      *     including BKPT for the reserved registers).
   2565      *     Ends at 128 bytes total.
   2566      *   art_quick_read_barrier_mark_introspection_gc_roots_wide:
   2567      *     GC root entrypoint code for LDR encoding T3 (28 bytes).
   2568      *     Forwarding address extraction for LDR encoding T3 (6 bytes).
   2569      *     Slow path for main entrypoint for LDR encoding T3 (30 bytes).
   2570      *     Ends at 192 bytes total.
   2571      *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:
   2572      *     GC root entrypoint code for LDR encoding T1 (28 bytes).
   2573      *     Forwarding address extraction for LDR encoding T1 (6 bytes).
   2574      *     Slow path for main entrypoint for LDR encoding T1 (30 bytes).
   2575      *     Ends at 256 bytes total.
   2576      *   art_quick_read_barrier_mark_introspection_arrays:
   2577      *     Exactly 128 bytes for array load switch cases (16x2 instructions).
   2578      */
   2579     .balign 512
   2580 ENTRY art_quick_read_barrier_mark_introspection
   2581     // At this point, IP contains the reference, R4 can be freely used.
   2582     // (R4 is reserved for the entrypoint address.)
   2583     // For heap poisoning, the reference is poisoned, so unpoison it first.
   2584     UNPOISON_HEAP_REF ip
   2585     // Check for null or marked, lock word is loaded into IP.
   2586     BRBMI_CHECK_NULL_AND_MARKED _wide
   2587     // Load the half of the instruction that contains Rt.
   2588     BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
   2589 .Lmark_introspection_extract_register_and_return_wide:
   2590     lsr     r4, r4, #12               // Extract `ref_reg`.
   2591     b       .Lmark_introspection_return_switch
   2592 
   2593     .balign 32
   2594     .thumb_func
   2595     .type art_quick_read_barrier_mark_introspection_narrow, #function
   2596     .hidden art_quick_read_barrier_mark_introspection_narrow
   2597     .global art_quick_read_barrier_mark_introspection_narrow
   2598 art_quick_read_barrier_mark_introspection_narrow:
   2599     // At this point, IP contains the reference, R4 can be freely used.
   2600     // (R4 is reserved for the entrypoint address.)
   2601     // For heap poisoning, the reference is poisoned, so unpoison it first.
   2602     UNPOISON_HEAP_REF ip
   2603     // Check for null or marked, lock word is loaded into R4.
   2604     BRBMI_CHECK_NULL_AND_MARKED _narrow
   2605     // Load the 16-bit instruction.
   2606     BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
   2607 .Lmark_introspection_extract_register_and_return_narrow:
   2608     and     r4, r4, #7                // Extract `ref_reg`.
   2609 .Lmark_introspection_return_switch:
   2610     tbb     [pc, r4]                  // Jump to the switch case.
   2611 .Lmark_introspection_return_table:
   2612     BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
   2613     .balign 16
   2614     BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
   2615 
   2616     BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
   2617     BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
   2618 
   2619     .balign 256
   2620     .thumb_func
   2621     .type art_quick_read_barrier_mark_introspection_arrays, #function
   2622     .hidden art_quick_read_barrier_mark_introspection_arrays
   2623     .global art_quick_read_barrier_mark_introspection_arrays
   2624 art_quick_read_barrier_mark_introspection_arrays:
   2625     BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
   2626 END art_quick_read_barrier_mark_introspection
   2627 
   2628 .extern artInvokePolymorphic
   2629 ENTRY art_quick_invoke_polymorphic
   2630     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   2631     mov     r2, r9                 @ pass Thread::Current
   2632     mov     r3, sp                 @ pass SP
   2633     mov     r0, #0                 @ initialize 64-bit JValue as zero.
   2634     str     r0, [sp, #-4]!
   2635     .cfi_adjust_cfa_offset 4
   2636     str     r0, [sp, #-4]!
   2637     .cfi_adjust_cfa_offset 4
   2638     mov     r0, sp                 @ pass JValue for return result as first argument.
   2639     bl      artInvokePolymorphic   @ artInvokePolymorphic(JValue, receiver, Thread*, SP)
   2640     sub     r0, 'A'                @ return value is descriptor of handle's return type.
   2641     cmp     r0, 'Z' - 'A'          @ check if value is in bounds of handler table
   2642     bgt     .Lcleanup_and_return   @ and clean-up if not.
   2643     adr     r1, .Lhandler_table
   2644     tbb     [r0, r1]               @ branch to handler for return value based on return type.
   2645 
   2646 .Lstart_of_handlers:
   2647 .Lstore_boolean_result:
   2648     ldrb    r0, [sp]               @ Copy boolean value to return value of this function.
   2649     b       .Lcleanup_and_return
   2650 .Lstore_char_result:
   2651     ldrh    r0, [sp]               @ Copy char value to return value of this function.
   2652     b       .Lcleanup_and_return
   2653 .Lstore_float_result:
   2654     vldr    s0, [sp]               @ Copy float value from JValue result to the context restored by
   2655     vstr    s0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2656     b       .Lcleanup_and_return
   2657 .Lstore_double_result:
   2658     vldr    d0, [sp]               @ Copy double value from JValue result to the context restored by
   2659     vstr    d0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2660     b       .Lcleanup_and_return
   2661 .Lstore_long_result:
   2662     ldr     r1, [sp, #4]           @ Copy the upper bits from JValue result to the context restored by
   2663     str     r1, [sp, #80]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2664     // Fall-through for lower bits.
   2665 .Lstore_int_result:
   2666     ldr     r0, [sp]               @ Copy int value to return value of this function.
   2667     // Fall-through to clean up and return.
   2668 .Lcleanup_and_return:
   2669     add     sp, #8
   2670     .cfi_adjust_cfa_offset -8
   2671     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2672     REFRESH_MARKING_REGISTER
   2673     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
   2674 
   2675 .macro HANDLER_TABLE_OFFSET handler_label
   2676     .byte (\handler_label - .Lstart_of_handlers) / 2
   2677 .endm
   2678 
   2679 .Lhandler_table:
   2680     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
   2681     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // B (byte)
   2682     HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
   2683     HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
   2684     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
   2685     HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
   2686     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
   2687     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
   2688     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // I (int)
   2689     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
   2690     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
   2691     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // L (object)
   2692     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
   2693     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
   2694     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
   2695     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
   2696     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
   2697     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
   2698     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // S (short)
   2699     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
   2700     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
   2701     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
   2702     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
   2703     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
   2704     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
   2705     HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
   2706 .purgem HANDLER_TABLE_OFFSET
   2707 END art_quick_invoke_polymorphic
   2708