Home | History | Annotate | Download | only in arm
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_arm.S"
     18 #include "interpreter/cfi_asm_support.h"
     19 
     20 #include "arch/quick_alloc_entrypoints.S"
     21 
     22     /* Deliver the given exception */
     23     .extern artDeliverExceptionFromCode
     24     /* Deliver an exception pending on a thread */
     25     .extern artDeliverPendingException
     26 
     27     /*
     28      * Macro to spill the GPRs.
     29      */
     30 .macro SPILL_ALL_CALLEE_SAVE_GPRS
     31     push {r4-r11, lr}                             @ 9 words (36 bytes) of callee saves.
     32     .cfi_adjust_cfa_offset 36
     33     .cfi_rel_offset r4, 0
     34     .cfi_rel_offset r5, 4
     35     .cfi_rel_offset r6, 8
     36     .cfi_rel_offset r7, 12
     37     .cfi_rel_offset r8, 16
     38     .cfi_rel_offset r9, 20
     39     .cfi_rel_offset r10, 24
     40     .cfi_rel_offset r11, 28
     41     .cfi_rel_offset lr, 32
     42 .endm
     43 
     44     /*
     45      * Macro that sets up the callee save frame to conform with
     46      * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
     47      */
     48 .macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp
     49     SPILL_ALL_CALLEE_SAVE_GPRS                    @ 9 words (36 bytes) of callee saves.
     50     vpush {s16-s31}                               @ 16 words (64 bytes) of floats.
     51     .cfi_adjust_cfa_offset 64
     52     sub sp, #12                                   @ 3 words of space, bottom word will hold Method*
     53     .cfi_adjust_cfa_offset 12
     54     RUNTIME_CURRENT1 \rTemp                       @ Load Runtime::Current into rTemp.
     55     @ Load kSaveAllCalleeSaves Method* into rTemp.
     56     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
     57     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     58     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     59 
     60      // Ugly compile-time check, but we only have the preprocessor.
     61 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12)
     62 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected."
     63 #endif
     64 .endm
     65 
     66     /*
     67      * Macro that sets up the callee save frame to conform with
     68      * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
     69      */
     70 .macro SETUP_SAVE_REFS_ONLY_FRAME rTemp
     71     // Note: We could avoid saving R8 in the case of Baker read
     72     // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
     73     // later; but it's not worth handling this special case.
     74     push {r5-r8, r10-r11, lr}                     @ 7 words of callee saves
     75     .cfi_adjust_cfa_offset 28
     76     .cfi_rel_offset r5, 0
     77     .cfi_rel_offset r6, 4
     78     .cfi_rel_offset r7, 8
     79     .cfi_rel_offset r8, 12
     80     .cfi_rel_offset r10, 16
     81     .cfi_rel_offset r11, 20
     82     .cfi_rel_offset lr, 24
     83     sub sp, #4                                    @ bottom word will hold Method*
     84     .cfi_adjust_cfa_offset 4
     85     RUNTIME_CURRENT2 \rTemp                       @ Load Runtime::Current into rTemp.
     86     @ Load kSaveRefsOnly Method* into rTemp.
     87     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
     88     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
     89     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
     90 
     91     // Ugly compile-time check, but we only have the preprocessor.
     92 #if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4)
     93 #error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected."
     94 #endif
     95 .endm
     96 
     97 .macro RESTORE_SAVE_REFS_ONLY_FRAME
     98     add sp, #4               @ bottom word holds Method*
     99     .cfi_adjust_cfa_offset -4
    100     // Note: Likewise, we could avoid restoring R8 in the case of Baker
    101     // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    102     // later; but it's not worth handling this special case.
    103     pop {r5-r8, r10-r11, lr} @ 7 words of callee saves
    104     .cfi_restore r5
    105     .cfi_restore r6
    106     .cfi_restore r7
    107     .cfi_restore r8
    108     .cfi_restore r10
    109     .cfi_restore r11
    110     .cfi_restore lr
    111     .cfi_adjust_cfa_offset -28
    112 .endm
    113 
    114     /*
    115      * Macro that sets up the callee save frame to conform with
    116      * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
    117      */
    118 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    119     // Note: We could avoid saving R8 in the case of Baker read
    120     // barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    121     // later; but it's not worth handling this special case.
    122     push {r1-r3, r5-r8, r10-r11, lr}   @ 10 words of callee saves and args.
    123     .cfi_adjust_cfa_offset 40
    124     .cfi_rel_offset r1, 0
    125     .cfi_rel_offset r2, 4
    126     .cfi_rel_offset r3, 8
    127     .cfi_rel_offset r5, 12
    128     .cfi_rel_offset r6, 16
    129     .cfi_rel_offset r7, 20
    130     .cfi_rel_offset r8, 24
    131     .cfi_rel_offset r10, 28
    132     .cfi_rel_offset r11, 32
    133     .cfi_rel_offset lr, 36
    134     vpush {s0-s15}                     @ 16 words of float args.
    135     .cfi_adjust_cfa_offset 64
    136     sub sp, #8                         @ 2 words of space, alignment padding and Method*
    137     .cfi_adjust_cfa_offset 8
    138     // Ugly compile-time check, but we only have the preprocessor.
    139 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8)
    140 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected."
    141 #endif
    142 .endm
    143 
    144 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp
    145     SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    146     RUNTIME_CURRENT3 \rTemp                       @ Load Runtime::Current into rTemp.
    147     @ Load kSaveRefsAndArgs Method* into rTemp.
    148     ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
    149     str \rTemp, [sp, #0]                          @ Place Method* at bottom of stack.
    150     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    151 .endm
    152 
    153 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
    154     SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY
    155     str r0, [sp, #0]                              @ Store ArtMethod* to bottom of stack.
    156     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    157 .endm
    158 
    159 .macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
    160     add  sp, #8                      @ rewind sp
    161     .cfi_adjust_cfa_offset -8
    162     vpop {s0-s15}
    163     .cfi_adjust_cfa_offset -64
    164     // Note: Likewise, we could avoid restoring X20 in the case of Baker
    165     // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER
    166     // later; but it's not worth handling this special case.
    167     pop {r1-r3, r5-r8, r10-r11, lr}  @ 10 words of callee saves
    168     .cfi_restore r1
    169     .cfi_restore r2
    170     .cfi_restore r3
    171     .cfi_restore r5
    172     .cfi_restore r6
    173     .cfi_restore r7
    174     .cfi_restore r8
    175     .cfi_restore r10
    176     .cfi_restore r11
    177     .cfi_restore lr
    178     .cfi_adjust_cfa_offset -40
    179 .endm
    180 
    181     /*
    182      * Macro that sets up the callee save frame to conform with
    183      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    184      * when core registers are already saved.
    185      */
    186 .macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
    187                                         @ 14 words of callee saves and args already saved.
    188     vpush {d0-d15}                      @ 32 words, 2 for each of the 16 saved doubles.
    189     .cfi_adjust_cfa_offset 128
    190     sub sp, #8                          @ 2 words of space, alignment padding and Method*
    191     .cfi_adjust_cfa_offset 8
    192     RUNTIME_CURRENT1 \rTemp             @ Load Runtime::Current into rTemp.
    193     @ Load kSaveEverything Method* into rTemp.
    194     ldr \rTemp, [\rTemp, #\runtime_method_offset]
    195     str \rTemp, [sp, #0]                @ Place Method* at bottom of stack.
    196     str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]  @ Place sp in Thread::Current()->top_quick_frame.
    197 
    198     // Ugly compile-time check, but we only have the preprocessor.
    199 #if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8)
    200 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected."
    201 #endif
    202 .endm
    203 
    204     /*
    205      * Macro that sets up the callee save frame to conform with
    206      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    207      */
    208 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
    209     push {r0-r12, lr}                   @ 14 words of callee saves and args.
    210     .cfi_adjust_cfa_offset 56
    211     .cfi_rel_offset r0, 0
    212     .cfi_rel_offset r1, 4
    213     .cfi_rel_offset r2, 8
    214     .cfi_rel_offset r3, 12
    215     .cfi_rel_offset r4, 16
    216     .cfi_rel_offset r5, 20
    217     .cfi_rel_offset r6, 24
    218     .cfi_rel_offset r7, 28
    219     .cfi_rel_offset r8, 32
    220     .cfi_rel_offset r9, 36
    221     .cfi_rel_offset r10, 40
    222     .cfi_rel_offset r11, 44
    223     .cfi_rel_offset ip, 48
    224     .cfi_rel_offset lr, 52
    225     SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset
    226 .endm
    227 
    228 .macro RESTORE_SAVE_EVERYTHING_FRAME
    229     add  sp, #8                         @ rewind sp
    230     .cfi_adjust_cfa_offset -8
    231     vpop {d0-d15}
    232     .cfi_adjust_cfa_offset -128
    233     pop {r0-r12, lr}                    @ 14 words of callee saves
    234     .cfi_restore r0
    235     .cfi_restore r1
    236     .cfi_restore r2
    237     .cfi_restore r3
    238     .cfi_restore r4
    239     .cfi_restore r5
    240     .cfi_restore r6
    241     .cfi_restore r7
    242     .cfi_restore r8
    243     .cfi_restore r9
    244     .cfi_restore r10
    245     .cfi_restore r11
    246     .cfi_restore r12
    247     .cfi_restore lr
    248     .cfi_adjust_cfa_offset -56
    249 .endm
    250 
    251 .macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
    252     add  sp, #8                         @ rewind sp
    253     .cfi_adjust_cfa_offset -8
    254     vpop {d0-d15}
    255     .cfi_adjust_cfa_offset -128
    256     add  sp, #4                         @ skip r0
    257     .cfi_adjust_cfa_offset -4
    258     .cfi_restore r0                     @ debugger can no longer restore caller's r0
    259     pop {r1-r12, lr}                    @ 13 words of callee saves
    260     .cfi_restore r1
    261     .cfi_restore r2
    262     .cfi_restore r3
    263     .cfi_restore r4
    264     .cfi_restore r5
    265     .cfi_restore r6
    266     .cfi_restore r7
    267     .cfi_restore r8
    268     .cfi_restore r9
    269     .cfi_restore r10
    270     .cfi_restore r11
    271     .cfi_restore r12
    272     .cfi_restore lr
    273     .cfi_adjust_cfa_offset -52
    274 .endm
    275 
    276 // Macro to refresh the Marking Register (R8).
    277 //
    278 // This macro must be called at the end of functions implementing
    279 // entrypoints that possibly (directly or indirectly) perform a
    280 // suspend check (before they return).
    281 .macro REFRESH_MARKING_REGISTER
    282 #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER)
    283     ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
    284 #endif
    285 .endm
    286 
    287 .macro RETURN_IF_RESULT_IS_ZERO
    288     cbnz   r0, 1f              @ result non-zero branch over
    289     bx     lr                  @ return
    290 1:
    291 .endm
    292 
    293 .macro RETURN_IF_RESULT_IS_NON_ZERO
    294     cbz    r0, 1f              @ result zero branch over
    295     bx     lr                  @ return
    296 1:
    297 .endm
    298 
    299     /*
    300      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    301      * exception is Thread::Current()->exception_ when the runtime method frame is ready.
    302      */
    303 .macro DELIVER_PENDING_EXCEPTION_FRAME_READY
    304     mov    r0, r9                              @ pass Thread::Current
    305     bl     artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
    306 .endm
    307 
    308     /*
    309      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    310      * exception is Thread::Current()->exception_.
    311      */
    312 .macro DELIVER_PENDING_EXCEPTION
    313     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
    314     DELIVER_PENDING_EXCEPTION_FRAME_READY
    315 .endm
    316 
    317 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    318     .extern \cxx_name
    319 ENTRY \c_name
    320     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save all registers as basis for long jump context
    321     mov r0, r9                      @ pass Thread::Current
    322     bl  \cxx_name                   @ \cxx_name(Thread*)
    323 END \c_name
    324 .endm
    325 
    326 .macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    327     .extern \cxx_name
    328 ENTRY \c_name
    329     SETUP_SAVE_EVERYTHING_FRAME r0  @ save all registers as basis for long jump context
    330     mov r0, r9                      @ pass Thread::Current
    331     bl  \cxx_name                   @ \cxx_name(Thread*)
    332 END \c_name
    333 .endm
    334 
    335 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    336     .extern \cxx_name
    337 ENTRY \c_name
    338     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1       @ save all registers as basis for long jump context
    339     mov r1, r9                      @ pass Thread::Current
    340     bl  \cxx_name                   @ \cxx_name(Thread*)
    341 END \c_name
    342 .endm
    343 
    344 .macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    345     .extern \cxx_name
    346 ENTRY \c_name
    347     SETUP_SAVE_EVERYTHING_FRAME r2  @ save all registers as basis for long jump context
    348     mov r2, r9                      @ pass Thread::Current
    349     bl  \cxx_name                   @ \cxx_name(Thread*)
    350 END \c_name
    351 .endm
    352 
    353 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
    354     ldr \reg, [r9, #THREAD_EXCEPTION_OFFSET]   // Get exception field.
    355     cbnz \reg, 1f
    356     bx lr
    357 1:
    358     DELIVER_PENDING_EXCEPTION
    359 .endm
    360 
    361 .macro  RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
    362     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1
    363 .endm
    364 
    365 .macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    366     RETURN_IF_RESULT_IS_ZERO
    367     DELIVER_PENDING_EXCEPTION
    368 .endm
    369 
    370 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
    371     RETURN_IF_RESULT_IS_NON_ZERO
    372     DELIVER_PENDING_EXCEPTION
    373 .endm
    374 
    375 // Macros taking opportunity of code similarities for downcalls.
    376 .macro  ONE_ARG_REF_DOWNCALL name, entrypoint, return
    377     .extern \entrypoint
    378 ENTRY \name
    379     SETUP_SAVE_REFS_ONLY_FRAME r1        @ save callee saves in case of GC
    380     mov    r1, r9                        @ pass Thread::Current
    381     bl     \entrypoint                   @ (uint32_t field_idx, Thread*)
    382     RESTORE_SAVE_REFS_ONLY_FRAME
    383     REFRESH_MARKING_REGISTER
    384     \return
    385 END \name
    386 .endm
    387 
    388 .macro  TWO_ARG_REF_DOWNCALL name, entrypoint, return
    389     .extern \entrypoint
    390 ENTRY \name
    391     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
    392     mov    r2, r9                        @ pass Thread::Current
    393     bl     \entrypoint                   @ (field_idx, Object*, Thread*)
    394     RESTORE_SAVE_REFS_ONLY_FRAME
    395     REFRESH_MARKING_REGISTER
    396     \return
    397 END \name
    398 .endm
    399 
    400 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
    401     .extern \entrypoint
    402 ENTRY \name
    403     SETUP_SAVE_REFS_ONLY_FRAME r3        @ save callee saves in case of GC
    404     mov    r3, r9                        @ pass Thread::Current
    405     bl     \entrypoint                   @ (field_idx, Object*, new_val, Thread*)
    406     RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
    407     REFRESH_MARKING_REGISTER
    408     \return
    409 END \name
    410 .endm
    411 
    412     /*
    413      * Called by managed code, saves callee saves and then calls artThrowException
    414      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    415      */
    416 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    417 
    418     /*
    419      * Called by managed code to create and deliver a NullPointerException.
    420      */
    421 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    422 
    423     /*
    424      * Call installed by a signal handler to create and deliver a NullPointerException.
    425      */
    426     .extern art_quick_throw_null_pointer_exception_from_signal
    427 ENTRY art_quick_throw_null_pointer_exception_from_signal
    428     // The fault handler pushes the gc map address, i.e. "return address", to stack
    429     // and passes the fault address in LR. So we need to set up the CFI info accordingly.
    430     .cfi_def_cfa_offset __SIZEOF_POINTER__
    431     .cfi_rel_offset lr, 0
    432     push {r0-r12}                   @ 13 words of callee saves and args; LR already saved.
    433     .cfi_adjust_cfa_offset 52
    434     .cfi_rel_offset r0, 0
    435     .cfi_rel_offset r1, 4
    436     .cfi_rel_offset r2, 8
    437     .cfi_rel_offset r3, 12
    438     .cfi_rel_offset r4, 16
    439     .cfi_rel_offset r5, 20
    440     .cfi_rel_offset r6, 24
    441     .cfi_rel_offset r7, 28
    442     .cfi_rel_offset r8, 32
    443     .cfi_rel_offset r9, 36
    444     .cfi_rel_offset r10, 40
    445     .cfi_rel_offset r11, 44
    446     .cfi_rel_offset ip, 48
    447 
    448     @ save all registers as basis for long jump context
    449     SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1
    450     mov r0, lr                      @ pass the fault address stored in LR by the fault handler.
    451     mov r1, r9                      @ pass Thread::Current
    452     bl  artThrowNullPointerExceptionFromSignal  @ (Thread*)
    453 END art_quick_throw_null_pointer_exception_from_signal
    454 
    455     /*
    456      * Called by managed code to create and deliver an ArithmeticException.
    457      */
    458 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
    459 
    460     /*
    461      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    462      * index, arg2 holds limit.
    463      */
    464 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    465 
    466     /*
    467      * Called by managed code to create and deliver a StringIndexOutOfBoundsException
    468      * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
    469      */
    470 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
    471 
    472     /*
    473      * Called by managed code to create and deliver a StackOverflowError.
    474      */
    475 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    476 
    477     /*
    478      * All generated callsites for interface invokes and invocation slow paths will load arguments
    479      * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
    480      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
    481      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1.
    482      *
    483      * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting
    484      * of the target Method* in r0 and method->code_ in r1.
    485      *
    486      * If unsuccessful, the helper will return null/null. There will bea pending exception in the
    487      * thread and we branch to another stub to deliver it.
    488      *
    489      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
    490      * pointing back to the original caller.
    491      *
    492      * Clobbers IP (R12).
    493      */
    494 .macro INVOKE_TRAMPOLINE_BODY cxx_name
    495     .extern \cxx_name
    496     SETUP_SAVE_REFS_AND_ARGS_FRAME r2     @ save callee saves in case allocation triggers GC
    497     mov    r2, r9                         @ pass Thread::Current
    498     mov    r3, sp
    499     bl     \cxx_name                      @ (method_idx, this, Thread*, SP)
    500     mov    r12, r1                        @ save Method*->code_
    501     RESTORE_SAVE_REFS_AND_ARGS_FRAME
    502     REFRESH_MARKING_REGISTER
    503     cbz    r0, 1f                         @ did we find the target? if not go to exception delivery
    504     bx     r12                            @ tail call to target
    505 1:
    506     DELIVER_PENDING_EXCEPTION
    507 .endm
    508 .macro INVOKE_TRAMPOLINE c_name, cxx_name
    509 ENTRY \c_name
    510     INVOKE_TRAMPOLINE_BODY \cxx_name
    511 END \c_name
    512 .endm
    513 
    514 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    515 
    516 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    517 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    518 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    519 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    520 
    521     /*
    522      * Quick invocation stub internal.
    523      * On entry:
    524      *   r0 = method pointer
    525      *   r1 = argument array or null for no argument methods
    526      *   r2 = size of argument array in bytes
    527      *   r3 = (managed) thread pointer
    528      *   [sp] = JValue* result
    529      *   [sp + 4] = result_in_float
    530      *   [sp + 8] = core register argument array
    531      *   [sp + 12] = fp register argument array
    532      *  +-------------------------+
    533      *  | uint32_t* fp_reg_args   |
    534      *  | uint32_t* core_reg_args |
    535      *  |   result_in_float       | <- Caller frame
    536      *  |   Jvalue* result        |
    537      *  +-------------------------+
    538      *  |          lr             |
    539      *  |          r11            |
    540      *  |          r9             |
    541      *  |          r4             | <- r11
    542      *  +-------------------------+
    543      *  | uint32_t out[n-1]       |
    544      *  |    :      :             |        Outs
    545      *  | uint32_t out[0]         |
    546      *  | StackRef<ArtMethod>     | <- SP  value=null
    547      *  +-------------------------+
    548      */
    549 ENTRY art_quick_invoke_stub_internal
    550     SPILL_ALL_CALLEE_SAVE_GPRS             @ spill regs (9)
    551     mov    r11, sp                         @ save the stack pointer
    552     .cfi_def_cfa_register r11
    553 
    554     mov    r9, r3                          @ move managed thread pointer into r9
    555 
    556     add    r4, r2, #4                      @ create space for method pointer in frame
    557     sub    r4, sp, r4                      @ reserve & align *stack* to 16 bytes: native calling
    558     and    r4, #0xFFFFFFF0                 @ convention only aligns to 8B, so we have to ensure ART
    559     mov    sp, r4                          @ 16B alignment ourselves.
    560 
    561     mov    r4, r0                          @ save method*
    562     add    r0, sp, #4                      @ pass stack pointer + method ptr as dest for memcpy
    563     bl     memcpy                          @ memcpy (dest, src, bytes)
    564     mov    ip, #0                          @ set ip to 0
    565     str    ip, [sp]                        @ store null for method* at bottom of frame
    566 
    567     ldr    ip, [r11, #48]                  @ load fp register argument array pointer
    568     vldm   ip, {s0-s15}                    @ copy s0 - s15
    569 
    570     ldr    ip, [r11, #44]                  @ load core register argument array pointer
    571     mov    r0, r4                          @ restore method*
    572     add    ip, ip, #4                      @ skip r0
    573     ldm    ip, {r1-r3}                     @ copy r1 - r3
    574 
    575     REFRESH_MARKING_REGISTER
    576 
    577     ldr    ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]  @ get pointer to the code
    578     blx    ip                              @ call the method
    579 
    580     mov    sp, r11                         @ restore the stack pointer
    581     .cfi_def_cfa_register sp
    582 
    583     ldr    r4, [sp, #40]                   @ load result_is_float
    584     ldr    r9, [sp, #36]                   @ load the result pointer
    585     cmp    r4, #0
    586     ite    eq
    587     strdeq r0, [r9]                        @ store r0/r1 into result pointer
    588     vstrne d0, [r9]                        @ store s0-s1/d0 into result pointer
    589 
    590     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}               @ restore spill regs
    591 END art_quick_invoke_stub_internal
    592 
    593     /*
    594      * On stack replacement stub.
    595      * On entry:
    596      *   r0 = stack to copy
    597      *   r1 = size of stack
    598      *   r2 = pc to call
    599      *   r3 = JValue* result
    600      *   [sp] = shorty
    601      *   [sp + 4] = thread
    602      */
    603 ENTRY art_quick_osr_stub
    604     SPILL_ALL_CALLEE_SAVE_GPRS             @ Spill regs (9)
    605     SAVE_SIZE=9*4
    606     mov    r11, sp                         @ Save the stack pointer
    607     .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
    608     .cfi_remember_state
    609     mov    r10, r1                         @ Save size of stack
    610     ldr    r9, [r11, #40]                  @ Move managed thread pointer into r9
    611     REFRESH_MARKING_REGISTER
    612     mov    r6, r2                          @ Save the pc to call
    613     sub    r7, sp, #12                     @ Reserve space for stack pointer,
    614                                            @    JValue* result, and ArtMethod* slot.
    615     and    r7, #0xFFFFFFF0                 @ Align stack pointer
    616     mov    sp, r7                          @ Update stack pointer
    617     str    r11, [sp, #4]                   @ Save old stack pointer
    618     str    r3, [sp, #8]                    @ Save JValue* result
    619     mov    ip, #0
    620     str    ip, [sp]                        @ Store null for ArtMethod* at bottom of frame
    621     // r11 isn't properly spilled in the osr method, so we need use DWARF expression.
    622     // NB: the CFI must be before the call since this is the address gdb will lookup.
    623     // NB: gdb expects that cfa_expression returns the CFA value (not address to it).
    624     .cfi_escape                            /* CFA = [sp + 4] + SAVE_SIZE */ \
    625       0x0f, 6,                             /* DW_CFA_def_cfa_expression(len) */ \
    626       0x92, 13, 4,                         /* DW_OP_bregx(reg,offset) */ \
    627       0x06,                                /* DW_OP_deref */ \
    628       0x23, SAVE_SIZE                      /* DW_OP_plus_uconst(val) */
    629     bl     .Losr_entry                     @ Call the method
    630     ldr    r10, [sp, #8]                   @ Restore JValue* result
    631     ldr    sp, [sp, #4]                    @ Restore saved stack pointer
    632     .cfi_def_cfa sp, SAVE_SIZE             @ CFA = sp + SAVE_SIZE
    633     ldr    r4, [sp, #36]                   @ load shorty
    634     ldrb   r4, [r4, #0]                    @ load return type
    635     cmp    r4, #68                         @ Test if result type char == 'D'.
    636     beq    .Losr_fp_result
    637     cmp    r4, #70                         @ Test if result type char == 'F'.
    638     beq    .Losr_fp_result
    639     strd r0, [r10]                         @ Store r0/r1 into result pointer
    640     b    .Losr_exit
    641 .Losr_fp_result:
    642     vstr d0, [r10]                         @ Store s0-s1/d0 into result pointer
    643 .Losr_exit:
    644     pop    {r4, r5, r6, r7, r8, r9, r10, r11, pc}
    645 .Losr_entry:
    646     .cfi_restore_state
    647     .cfi_def_cfa r11, SAVE_SIZE            @ CFA = r11 + SAVE_SIZE
    648     sub sp, sp, r10                        @ Reserve space for callee stack
    649     sub r10, r10, #4
    650     str lr, [sp, r10]                      @ Store link register per the compiler ABI
    651     mov r2, r10
    652     mov r1, r0
    653     mov r0, sp
    654     bl  memcpy                             @ memcpy (dest r0, src r1, bytes r2)
    655     bx r6
    656 END art_quick_osr_stub
    657 
    658     /*
    659      * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_
    660      */
    661 ARM_ENTRY art_quick_do_long_jump
    662     vldm r1, {s0-s31}     @ load all fprs from argument fprs_
    663     ldr  r2, [r0, #60]    @ r2 = r15 (PC from gprs_ 60=4*15)
    664     ldr  r14, [r0, #56]   @ (LR from gprs_ 56=4*14)
    665     add  r0, r0, #12      @ increment r0 to skip gprs_[0..2] 12=4*3
    666     ldm  r0, {r3-r13}     @ load remaining gprs from argument gprs_
    667     REFRESH_MARKING_REGISTER
    668     ldr  r0, [r0, #-12]   @ load r0 value
    669     mov  r1, #0           @ clear result register r1
    670     bx   r2               @ do long jump
    671 END art_quick_do_long_jump
    672 
    673     /*
    674      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
    675      * failure.
    676      */
    677 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
    678 
    679     /*
    680      * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the
    681      * possibly null object to lock.
    682      */
    683     .extern artLockObjectFromCode
    684 ENTRY art_quick_lock_object
    685     cbz    r0, .Lslow_lock
    686 .Lretry_lock:
    687     ldr    r2, [r9, #THREAD_ID_OFFSET]
    688     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    689     mov    r3, r1
    690     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    691     cbnz   r3, .Lnot_unlocked         @ already thin locked
    692     @ unlocked case - r1: original lock word that's zero except for the read barrier bits.
    693     orr    r2, r1, r2                 @ r2 holds thread id with count of 0 with preserved read barrier bits
    694     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    695     cbnz   r3, .Llock_strex_fail      @ store failed, retry
    696     dmb    ish                        @ full (LoadLoad|LoadStore) memory barrier
    697     bx lr
    698 .Lnot_unlocked:  @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits
    699     lsr    r3, r1, LOCK_WORD_STATE_SHIFT
    700     cbnz   r3, .Lslow_lock            @ if either of the top two bits are set, go slow path
    701     eor    r2, r1, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    702     uxth   r2, r2                     @ zero top 16 bits
    703     cbnz   r2, .Lslow_lock            @ lock word and self thread id's match -> recursive lock
    704                                       @ else contention, go to slow path
    705     mov    r3, r1                     @ copy the lock word to check count overflow.
    706     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits.
    707     add    r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count in lock word placing in r2 to check overflow
    708     lsr    r3, r2, #LOCK_WORD_GC_STATE_SHIFT    @ if the first gc state bit is set, we overflowed.
    709     cbnz   r3, .Lslow_lock            @ if we overflow the count go slow path
    710     add    r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ increment count for real
    711     strex  r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits
    712     cbnz   r3, .Llock_strex_fail      @ strex failed, retry
    713     bx lr
    714 .Llock_strex_fail:
    715     b      .Lretry_lock               @ retry
    716 .Lslow_lock:
    717     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
    718     mov    r1, r9                     @ pass Thread::Current
    719     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    720     RESTORE_SAVE_REFS_ONLY_FRAME
    721     REFRESH_MARKING_REGISTER
    722     RETURN_IF_RESULT_IS_ZERO
    723     DELIVER_PENDING_EXCEPTION
    724 END art_quick_lock_object
    725 
    726 ENTRY art_quick_lock_object_no_inline
    727     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case we block
    728     mov    r1, r9                     @ pass Thread::Current
    729     bl     artLockObjectFromCode      @ (Object* obj, Thread*)
    730     RESTORE_SAVE_REFS_ONLY_FRAME
    731     REFRESH_MARKING_REGISTER
    732     RETURN_IF_RESULT_IS_ZERO
    733     DELIVER_PENDING_EXCEPTION
    734 END art_quick_lock_object_no_inline
    735 
    736     /*
    737      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
    738      * r0 holds the possibly null object to lock.
    739      */
    740     .extern artUnlockObjectFromCode
    741 ENTRY art_quick_unlock_object
    742     cbz    r0, .Lslow_unlock
    743 .Lretry_unlock:
    744 #ifndef USE_READ_BARRIER
    745     ldr    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    746 #else
    747     ldrex  r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ Need to use atomic instructions for read barrier
    748 #endif
    749     lsr    r2, r1, #LOCK_WORD_STATE_SHIFT
    750     cbnz   r2, .Lslow_unlock          @ if either of the top two bits are set, go slow path
    751     ldr    r2, [r9, #THREAD_ID_OFFSET]
    752     mov    r3, r1                     @ copy lock word to check thread id equality
    753     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    754     eor    r3, r3, r2                 @ lock_word.ThreadId() ^ self->ThreadId()
    755     uxth   r3, r3                     @ zero top 16 bits
    756     cbnz   r3, .Lslow_unlock          @ do lock word and self thread id's match?
    757     mov    r3, r1                     @ copy lock word to detect transition to unlocked
    758     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  @ zero the gc bits
    759     cmp    r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
    760     bpl    .Lrecursive_thin_unlock
    761     @ transition to unlocked
    762     mov    r3, r1
    763     and    r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED  @ r3: zero except for the preserved gc bits
    764     dmb    ish                        @ full (LoadStore|StoreStore) memory barrier
    765 #ifndef USE_READ_BARRIER
    766     str    r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    767 #else
    768     strex  r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    769     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    770 #endif
    771     bx     lr
    772 .Lrecursive_thin_unlock:  @ r1: original lock word
    773     sub    r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  @ decrement count
    774 #ifndef USE_READ_BARRIER
    775     str    r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
    776 #else
    777     strex  r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]  @ strex necessary for read barrier bits
    778     cbnz   r2, .Lunlock_strex_fail    @ store failed, retry
    779 #endif
    780     bx     lr
    781 .Lunlock_strex_fail:
    782     b      .Lretry_unlock             @ retry
    783 .Lslow_unlock:
    784     @ save callee saves in case exception allocation triggers GC
    785     SETUP_SAVE_REFS_ONLY_FRAME r1
    786     mov    r1, r9                     @ pass Thread::Current
    787     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    788     RESTORE_SAVE_REFS_ONLY_FRAME
    789     REFRESH_MARKING_REGISTER
    790     RETURN_IF_RESULT_IS_ZERO
    791     DELIVER_PENDING_EXCEPTION
    792 END art_quick_unlock_object
    793 
    794 ENTRY art_quick_unlock_object_no_inline
    795     @ save callee saves in case exception allocation triggers GC
    796     SETUP_SAVE_REFS_ONLY_FRAME r1
    797     mov    r1, r9                     @ pass Thread::Current
    798     bl     artUnlockObjectFromCode    @ (Object* obj, Thread*)
    799     RESTORE_SAVE_REFS_ONLY_FRAME
    800     REFRESH_MARKING_REGISTER
    801     RETURN_IF_RESULT_IS_ZERO
    802     DELIVER_PENDING_EXCEPTION
    803 END art_quick_unlock_object_no_inline
    804 
    805     /*
    806      * Entry from managed code that calls artInstanceOfFromCode and on failure calls
    807      * artThrowClassCastExceptionForObject.
    808      */
    809     .extern artInstanceOfFromCode
    810     .extern artThrowClassCastExceptionForObject
    811 ENTRY art_quick_check_instance_of
    812     push {r0-r2, lr}                    @ save arguments, padding (r2) and link register
    813     .cfi_adjust_cfa_offset 16
    814     .cfi_rel_offset r0, 0
    815     .cfi_rel_offset r1, 4
    816     .cfi_rel_offset r2, 8
    817     .cfi_rel_offset lr, 12
    818     bl artInstanceOfFromCode
    819     cbz    r0, .Lthrow_class_cast_exception
    820     pop {r0-r2, pc}
    821 
    822 .Lthrow_class_cast_exception:
    823     pop {r0-r2, lr}
    824     .cfi_adjust_cfa_offset -16
    825     .cfi_restore r0
    826     .cfi_restore r1
    827     .cfi_restore r2
    828     .cfi_restore lr
    829 
    830     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
    831     mov r2, r9                      @ pass Thread::Current
    832     bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
    833     bkpt
    834 END art_quick_check_instance_of
    835 
    836 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
    837 .macro POP_REG_NE rReg, offset, rExclude
    838     .ifnc \rReg, \rExclude
    839         ldr \rReg, [sp, #\offset]   @ restore rReg
    840         .cfi_restore \rReg
    841     .endif
    842 .endm
    843 
    844 // Save rReg's value to [sp, #offset].
    845 .macro PUSH_REG rReg, offset
    846     str \rReg, [sp, #\offset]       @ save rReg
    847     .cfi_rel_offset \rReg, \offset
    848 .endm
    849 
    850     /*
    851      * Macro to insert read barrier, only used in art_quick_aput_obj.
    852      * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET.
    853      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
    854      */
    855 .macro READ_BARRIER rDest, rObj, offset
    856 #ifdef USE_READ_BARRIER
    857     push {r0-r3, ip, lr}            @ 6 words for saved registers (used in art_quick_aput_obj)
    858     .cfi_adjust_cfa_offset 24
    859     .cfi_rel_offset r0, 0
    860     .cfi_rel_offset r1, 4
    861     .cfi_rel_offset r2, 8
    862     .cfi_rel_offset r3, 12
    863     .cfi_rel_offset ip, 16
    864     .cfi_rel_offset lr, 20
    865     sub sp, #8                      @ push padding
    866     .cfi_adjust_cfa_offset 8
    867     @ mov r0, \rRef                 @ pass ref in r0 (no-op for now since parameter ref is unused)
    868     .ifnc \rObj, r1
    869         mov r1, \rObj               @ pass rObj
    870     .endif
    871     mov r2, #\offset                @ pass offset
    872     bl artReadBarrierSlow           @ artReadBarrierSlow(ref, rObj, offset)
    873     @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning.
    874     .ifnc \rDest, r0
    875         mov \rDest, r0              @ save return value in rDest
    876     .endif
    877     add sp, #8                      @ pop padding
    878     .cfi_adjust_cfa_offset -8
    879     POP_REG_NE r0, 0, \rDest        @ conditionally restore saved registers
    880     POP_REG_NE r1, 4, \rDest
    881     POP_REG_NE r2, 8, \rDest
    882     POP_REG_NE r3, 12, \rDest
    883     POP_REG_NE ip, 16, \rDest
    884     add sp, #20
    885     .cfi_adjust_cfa_offset -20
    886     pop {lr}                        @ restore lr
    887     .cfi_adjust_cfa_offset -4
    888     .cfi_restore lr
    889 #else
    890     ldr \rDest, [\rObj, #\offset]
    891     UNPOISON_HEAP_REF \rDest
    892 #endif  // USE_READ_BARRIER
    893 .endm
    894 
    895 #ifdef USE_READ_BARRIER
    896     .extern artReadBarrierSlow
    897 #endif
    898     .hidden art_quick_aput_obj
    899 ENTRY art_quick_aput_obj
    900 #ifdef USE_READ_BARRIER
    901     @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro.
    902     tst r2, r2
    903     beq .Ldo_aput_null
    904 #else
    905     cbz r2, .Ldo_aput_null
    906 #endif  // USE_READ_BARRIER
    907     READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET
    908     READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET
    909     READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET
    910     cmp r3, ip  @ value's type == array's component type - trivial assignability
    911     bne .Lcheck_assignability
    912 .Ldo_aput:
    913     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    914     POISON_HEAP_REF r2
    915     str r2, [r3, r1, lsl #2]
    916     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    917     lsr r0, r0, #CARD_TABLE_CARD_SHIFT
    918     strb r3, [r3, r0]
    919     blx lr
    920 .Ldo_aput_null:
    921     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    922     str r2, [r3, r1, lsl #2]
    923     blx lr
    924 .Lcheck_assignability:
    925     push {r0-r2, lr}             @ save arguments
    926     .cfi_adjust_cfa_offset 16
    927     .cfi_rel_offset r0, 0
    928     .cfi_rel_offset r1, 4
    929     .cfi_rel_offset r2, 8
    930     .cfi_rel_offset lr, 12
    931     mov r1, ip
    932     mov r0, r3
    933     bl artIsAssignableFromCode
    934     cbz r0, .Lthrow_array_store_exception
    935     pop {r0-r2, lr}
    936     .cfi_restore r0
    937     .cfi_restore r1
    938     .cfi_restore r2
    939     .cfi_restore lr
    940     .cfi_adjust_cfa_offset -16
    941     add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
    942     POISON_HEAP_REF r2
    943     str r2, [r3, r1, lsl #2]
    944     ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET]
    945     lsr r0, r0, #CARD_TABLE_CARD_SHIFT
    946     strb r3, [r3, r0]
    947     blx lr
    948 .Lthrow_array_store_exception:
    949     pop {r0-r2, lr}
    950     /* No need to repeat restore cfi directives, the ones above apply here. */
    951     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3
    952     mov r1, r2
    953     mov r2, r9                     @ pass Thread::Current
    954     bl artThrowArrayStoreException @ (Class*, Class*, Thread*)
    955     bkpt                           @ unreached
    956 END art_quick_aput_obj
    957 
    958 // Macro to facilitate adding new allocation entrypoints.
    959 .macro ONE_ARG_DOWNCALL name, entrypoint, return
    960     .extern \entrypoint
    961 ENTRY \name
    962     SETUP_SAVE_REFS_ONLY_FRAME r1     @ save callee saves in case of GC
    963     mov    r1, r9                     @ pass Thread::Current
    964     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    965     RESTORE_SAVE_REFS_ONLY_FRAME
    966     REFRESH_MARKING_REGISTER
    967     \return
    968 END \name
    969 .endm
    970 
    971 // Macro to facilitate adding new allocation entrypoints.
    972 .macro TWO_ARG_DOWNCALL name, entrypoint, return
    973     .extern \entrypoint
    974 ENTRY \name
    975     SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
    976     mov    r2, r9                     @ pass Thread::Current
    977     bl     \entrypoint     @ (uint32_t type_idx, Method* method, Thread*)
    978     RESTORE_SAVE_REFS_ONLY_FRAME
    979     REFRESH_MARKING_REGISTER
    980     \return
    981 END \name
    982 .endm
    983 
    984 // Macro to facilitate adding new array allocation entrypoints.
    985 .macro THREE_ARG_DOWNCALL name, entrypoint, return
    986     .extern \entrypoint
    987 ENTRY \name
    988     SETUP_SAVE_REFS_ONLY_FRAME r3     @ save callee saves in case of GC
    989     mov    r3, r9                     @ pass Thread::Current
    990     @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*)
    991     bl     \entrypoint
    992     RESTORE_SAVE_REFS_ONLY_FRAME
    993     REFRESH_MARKING_REGISTER
    994     \return
    995 END \name
    996 .endm
    997 
    998 // Macro to facilitate adding new allocation entrypoints.
    999 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
   1000     .extern \entrypoint
   1001 ENTRY \name
   1002     SETUP_SAVE_REFS_ONLY_FRAME r12    @ save callee saves in case of GC
   1003     str    r9, [sp, #-16]!            @ expand the frame and pass Thread::Current
   1004     .cfi_adjust_cfa_offset 16
   1005     bl     \entrypoint
   1006     add    sp, #16                    @ strip the extra frame
   1007     .cfi_adjust_cfa_offset -16
   1008     RESTORE_SAVE_REFS_ONLY_FRAME
   1009     REFRESH_MARKING_REGISTER
   1010     \return
   1011 END \name
   1012 .endm
   1013 
   1014 // Macro for string and type resolution and initialization.
   1015 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET
   1016     .extern \entrypoint
   1017 ENTRY \name
   1018     SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset    @ save everything in case of GC
   1019     mov    r1, r9                     @ pass Thread::Current
   1020     bl     \entrypoint                @ (uint32_t index, Thread*)
   1021     cbz    r0, 1f                     @ If result is null, deliver the OOME.
   1022     .cfi_remember_state
   1023     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
   1024     REFRESH_MARKING_REGISTER
   1025     bx     lr
   1026     .cfi_restore_state
   1027 1:
   1028     DELIVER_PENDING_EXCEPTION_FRAME_READY
   1029 END \name
   1030 .endm
   1031 
   1032 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint
   1033     ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET
   1034 .endm
   1035 
   1036 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
   1037 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode
   1038 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
   1039 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
   1040 
   1041 // Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are
   1042 // defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc.
   1043 
   1044     /*
   1045      * Called by managed code to resolve a static field and load a non-wide value.
   1046      */
   1047 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1048 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1049 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1050 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1051 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1052 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1053     /*
   1054      * Called by managed code to resolve a static field and load a 64-bit primitive value.
   1055      */
   1056     .extern artGet64StaticFromCompiledCode
   1057 ENTRY art_quick_get64_static
   1058     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
   1059     mov    r1, r9                        @ pass Thread::Current
   1060     bl     artGet64StaticFromCompiledCode        @ (uint32_t field_idx, Thread*)
   1061     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1062     RESTORE_SAVE_REFS_ONLY_FRAME
   1063     REFRESH_MARKING_REGISTER
   1064     cbnz   r2, 1f                        @ success if no exception pending
   1065     bx     lr                            @ return on success
   1066 1:
   1067     DELIVER_PENDING_EXCEPTION
   1068 END art_quick_get64_static
   1069 
   1070     /*
   1071      * Called by managed code to resolve an instance field and load a non-wide value.
   1072      */
   1073 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1074 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1075 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1076 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1077 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1078 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1
   1079     /*
   1080      * Called by managed code to resolve an instance field and load a 64-bit primitive value.
   1081      */
   1082     .extern artGet64InstanceFromCompiledCode
   1083 ENTRY art_quick_get64_instance
   1084     SETUP_SAVE_REFS_ONLY_FRAME r2        @ save callee saves in case of GC
   1085     mov    r2, r9                        @ pass Thread::Current
   1086     bl     artGet64InstanceFromCompiledCode      @ (field_idx, Object*, Thread*)
   1087     ldr    r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1088     RESTORE_SAVE_REFS_ONLY_FRAME
   1089     REFRESH_MARKING_REGISTER
   1090     cbnz   r2, 1f                        @ success if no exception pending
   1091     bx     lr                            @ return on success
   1092 1:
   1093     DELIVER_PENDING_EXCEPTION
   1094 END art_quick_get64_instance
   1095 
   1096     /*
   1097      * Called by managed code to resolve a static field and store a value.
   1098      */
   1099 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1100 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1101 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1102 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1103 
   1104     /*
   1105      * Called by managed code to resolve an instance field and store a non-wide value.
   1106      */
   1107 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1108 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1109 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1110 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER
   1111 
   1112     /*
   1113      * Called by managed code to resolve an instance field and store a wide value.
   1114      */
   1115     .extern artSet64InstanceFromCompiledCode
   1116 ENTRY art_quick_set64_instance
   1117     SETUP_SAVE_REFS_ONLY_FRAME r12       @ save callee saves in case of GC
   1118                                          @ r2:r3 contain the wide argument
   1119     str    r9, [sp, #-16]!               @ expand the frame and pass Thread::Current
   1120     .cfi_adjust_cfa_offset 16
   1121     bl     artSet64InstanceFromCompiledCode      @ (field_idx, Object*, new_val, Thread*)
   1122     add    sp, #16                       @ release out args
   1123     .cfi_adjust_cfa_offset -16
   1124     RESTORE_SAVE_REFS_ONLY_FRAME         @ TODO: we can clearly save an add here
   1125     REFRESH_MARKING_REGISTER
   1126     RETURN_IF_RESULT_IS_ZERO
   1127     DELIVER_PENDING_EXCEPTION
   1128 END art_quick_set64_instance
   1129 
   1130     .extern artSet64StaticFromCompiledCode
   1131 ENTRY art_quick_set64_static
   1132     SETUP_SAVE_REFS_ONLY_FRAME r12        @ save callee saves in case of GC
   1133                                           @ r2:r3 contain the wide argument
   1134     str    r9, [sp, #-16]!                @ expand the frame and pass Thread::Current
   1135     .cfi_adjust_cfa_offset 16
   1136     bl     artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*)
   1137     add    sp, #16                        @ release out args
   1138     .cfi_adjust_cfa_offset -16
   1139     RESTORE_SAVE_REFS_ONLY_FRAME          @ TODO: we can clearly save an add here
   1140     REFRESH_MARKING_REGISTER
   1141     RETURN_IF_RESULT_IS_ZERO
   1142     DELIVER_PENDING_EXCEPTION
   1143 END art_quick_set64_static
   1144 
   1145 // Generate the allocation entrypoints for each allocator.
   1146 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
   1147 // Comment out allocators that have arm specific asm.
   1148 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
   1149 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
   1150 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1151 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
   1152 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
   1153 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
   1154 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
   1155 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
   1156 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
   1157 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
   1158 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
   1159 
   1160 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
   1161 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
   1162 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
   1163 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
   1164 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
   1165 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
   1166 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
   1167 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
   1168 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
   1169 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
   1170 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
   1171 
   1172 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc).
   1173 //
   1174 // If isInitialized=1 then the compiler assumes the object's class has already been initialized.
   1175 // If isInitialized=0 the compiler can only assume it's been at least resolved.
   1176 .macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized
   1177 ENTRY \c_name
   1178     // Fast path rosalloc allocation.
   1179     // r0: type/return value, r9: Thread::Current
   1180     // r1, r2, r3, r12: free.
   1181     ldr    r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]     // Check if the thread local
   1182                                                               // allocation stack has room.
   1183                                                               // TODO: consider using ldrd.
   1184     ldr    r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
   1185     cmp    r3, r12
   1186     bhs    .Lslow_path\c_name
   1187 
   1188     ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3)
   1189     cmp    r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
   1190                                                               // local allocation. Also does the
   1191                                                               // initialized and finalizable checks.
   1192     // When isInitialized == 0, then the class is potentially not yet initialized.
   1193     // If the class is not yet initialized, the object size will be very large to force the branch
   1194     // below to be taken.
   1195     //
   1196     // See InitializeClassVisitors in class-inl.h for more details.
   1197     bhs    .Lslow_path\c_name
   1198                                                               // Compute the rosalloc bracket index
   1199                                                               // from the size. Since the size is
   1200                                                               // already aligned we can combine the
   1201                                                               // two shifts together.
   1202     add    r12, r9, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
   1203                                                               // Subtract pointer size since ther
   1204                                                               // are no runs for 0 byte allocations
   1205                                                               // and the size is already aligned.
   1206                                                               // Load the rosalloc run (r12)
   1207     ldr    r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
   1208                                                               // Load the free list head (r3). This
   1209                                                               // will be the return val.
   1210     ldr    r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1211     cbz    r3, .Lslow_path\c_name
   1212     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1213     ldr    r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
   1214                                                               // and update the list head with the
   1215                                                               // next pointer.
   1216     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1217                                                               // Store the class pointer in the
   1218                                                               // header. This also overwrites the
   1219                                                               // next pointer. The offsets are
   1220                                                               // asserted to match.
   1221 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
   1222 #error "Class pointer needs to overwrite next pointer."
   1223 #endif
   1224     POISON_HEAP_REF r0
   1225     str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]
   1226                                                               // Push the new object onto the thread
   1227                                                               // local allocation stack and
   1228                                                               // increment the thread local
   1229                                                               // allocation stack top.
   1230     ldr    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1231     str    r3, [r1], #COMPRESSED_REFERENCE_SIZE               // (Increment r1 as a side effect.)
   1232     str    r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1233                                                               // Decrement the size of the free list
   1234 
   1235     // After this "STR" the object is published to the thread local allocation stack,
   1236     // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
   1237     // It is not yet visible to the running (user) compiled code until after the return.
   1238     //
   1239     // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
   1240     // the state of the allocation stack slot. It can be a pointer to one of:
   1241     // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet.
   1242     //       (The stack initial state is "null" pointers).
   1243     // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot.
   1244     // 2) A fully valid object, with a valid class pointer pointing to a real class.
   1245     // Other states are not allowed.
   1246     //
   1247     // An object that is invalid only temporarily, and will eventually become valid.
   1248     // The internal runtime code simply checks if the object is not null or is partial and then
   1249     // ignores it.
   1250     //
   1251     // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing
   1252     // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot
   1253     // "next" pointer is not-cyclic.)
   1254     //
   1255     // See also b/28790624 for a listing of CLs dealing with this race.
   1256     ldr    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1257     sub    r1, #1
   1258                                                               // TODO: consider combining this store
   1259                                                               // and the list head store above using
   1260                                                               // strd.
   1261     str    r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1262 
   1263     mov    r0, r3                                             // Set the return value and return.
   1264 .if \isInitialized == 0
   1265     // This barrier is only necessary when the allocation also requires
   1266     // a class initialization check.
   1267     //
   1268     // If the class is already observably initialized, then new-instance allocations are protected
   1269     // from publishing by the compiler which inserts its own StoreStore barrier.
   1270     dmb    ish
   1271     // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
   1272     // they should happen-after the implicit initialization check.
   1273     //
   1274     // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing
   1275     // a new observably-initialized class state.
   1276 .endif
   1277     bx     lr
   1278 
   1279 .Lslow_path\c_name:
   1280     SETUP_SAVE_REFS_ONLY_FRAME r2     @ save callee saves in case of GC
   1281     mov    r1, r9                     @ pass Thread::Current
   1282     bl     \cxx_name                  @ (mirror::Class* cls, Thread*)
   1283     RESTORE_SAVE_REFS_ONLY_FRAME
   1284     REFRESH_MARKING_REGISTER
   1285     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1286 END \c_name
   1287 .endm
   1288 
   1289 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0
   1290 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1
   1291 
   1292 // The common fast path code for art_quick_alloc_object_resolved/initialized_tlab
   1293 // and art_quick_alloc_object_resolved/initialized_region_tlab.
   1294 //
   1295 // r0: type r9: Thread::Current, r1, r2, r3, r12: free.
   1296 // Need to preserve r0 to the slow path.
   1297 //
   1298 // If isInitialized=1 then the compiler assumes the object's class has already been initialized.
   1299 // If isInitialized=0 the compiler can only assume it's been at least resolved.
   1300 .macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized
   1301                                                              // Load thread_local_pos (r12) and
   1302                                                              // thread_local_end (r3) with ldrd.
   1303                                                              // Check constraints for ldrd.
   1304 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
   1305 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
   1306 #endif
   1307     ldrd   r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET]
   1308     sub    r12, r3, r12                                       // Compute the remaining buf size.
   1309     ldr    r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (r3).
   1310     cmp    r3, r12                                            // Check if it fits.
   1311     // When isInitialized == 0, then the class is potentially not yet initialized.
   1312     // If the class is not yet initialized, the object size will be very large to force the branch
   1313     // below to be taken.
   1314     //
   1315     // See InitializeClassVisitors in class-inl.h for more details.
   1316     bhi    \slowPathLabel
   1317     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1318                                                               // Reload old thread_local_pos (r0)
   1319                                                               // for the return value.
   1320     ldr    r2, [r9, #THREAD_LOCAL_POS_OFFSET]
   1321     add    r1, r2, r3
   1322     str    r1, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
   1323     // After this "STR" the object is published to the thread local allocation stack,
   1324     // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view.
   1325     // It is not yet visible to the running (user) compiled code until after the return.
   1326     //
   1327     // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating
   1328     // the state of the object. It can be either:
   1329     // 1) A partially valid object, with a null class pointer
   1330     //       (because the initial state of TLAB buffers is all 0s/nulls).
   1331     // 2) A fully valid object, with a valid class pointer pointing to a real class.
   1332     // Other states are not allowed.
   1333     //
   1334     // An object that is invalid only temporarily, and will eventually become valid.
   1335     // The internal runtime code simply checks if the object is not null or is partial and then
   1336     // ignores it.
   1337     //
   1338     // (Note: The actual check is done by checking that the object's class pointer is non-null.
   1339     // Also, unlike rosalloc, the object can never be observed as null).
   1340     ldr    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
   1341     add    r1, r1, #1
   1342     str    r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
   1343     POISON_HEAP_REF r0
   1344     str    r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1345                                                               // Fence. This is "ish" not "ishst" so
   1346                                                               // that the code after this allocation
   1347                                                               // site will see the right values in
   1348                                                               // the fields of the class.
   1349     mov    r0, r2
   1350 .if \isInitialized == 0
   1351     // This barrier is only necessary when the allocation also requires
   1352     // a class initialization check.
   1353     //
   1354     // If the class is already observably initialized, then new-instance allocations are protected
   1355     // from publishing by the compiler which inserts its own StoreStore barrier.
   1356     dmb    ish
   1357     // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size),
   1358     // they should happen-after the implicit initialization check.
   1359     //
   1360     // TODO: Remove dmb for class initialization checks (b/36692143)
   1361 .endif
   1362     bx     lr
   1363 .endm
   1364 
   1365 // The common code for art_quick_alloc_object_*region_tlab
   1366 .macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized
   1367 ENTRY \name
   1368     // Fast path tlab allocation.
   1369     // r0: type, r9: Thread::Current
   1370     // r1, r2, r3, r12: free.
   1371     ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized
   1372 .Lslow_path\name:
   1373     SETUP_SAVE_REFS_ONLY_FRAME r2                             // Save callee saves in case of GC.
   1374     mov    r1, r9                                             // Pass Thread::Current.
   1375     bl     \entrypoint                                        // (mirror::Class* klass, Thread*)
   1376     RESTORE_SAVE_REFS_ONLY_FRAME
   1377     REFRESH_MARKING_REGISTER
   1378     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1379 END \name
   1380 .endm
   1381 
   1382 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0
   1383 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1
   1384 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0
   1385 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1
   1386 
   1387 
   1388 // The common fast path code for art_quick_alloc_array_resolved/initialized_tlab
   1389 // and art_quick_alloc_array_resolved/initialized_region_tlab.
   1390 //
   1391 // r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free.
   1392 // Need to preserve r0 and r1 to the slow path.
   1393 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel
   1394     and    r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED             // Apply alignment mask
   1395                                                               // (addr + 7) & ~7.
   1396 
   1397                                                               // Load thread_local_pos (r3) and
   1398                                                               // thread_local_end (r12) with ldrd.
   1399                                                               // Check constraints for ldrd.
   1400 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0))
   1401 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance"
   1402 #endif
   1403     ldrd   r3, r12, [r9, #THREAD_LOCAL_POS_OFFSET]
   1404     sub    r12, r12, r3                                       // Compute the remaining buf size.
   1405     cmp    r2, r12                                            // Check if the total_size fits.
   1406     // The array class is always initialized here. Unlike new-instance,
   1407     // this does not act as a double test.
   1408     bhi    \slowPathLabel
   1409     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1.
   1410     add    r2, r2, r3
   1411     str    r2, [r9, #THREAD_LOCAL_POS_OFFSET]                 // Store new thread_local_pos.
   1412     ldr    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]             // Increment thread_local_objects.
   1413     add    r2, r2, #1
   1414     str    r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET]
   1415     POISON_HEAP_REF r0
   1416     str    r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1417     str    r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET]              // Store the array length.
   1418                                                               // Fence. This is "ish" not "ishst" so
   1419                                                               // that the code after this allocation
   1420                                                               // site will see the right values in
   1421                                                               // the fields of the class.
   1422     mov    r0, r3
   1423 // new-array is special. The class is loaded and immediately goes to the Initialized state
   1424 // before it is published. Therefore the only fence needed is for the publication of the object.
   1425 // See ClassLinker::CreateArrayClass() for more details.
   1426 
   1427 // For publication of the new array, we don't need a 'dmb ishst' here.
   1428 // The compiler generates 'dmb ishst' for all new-array insts.
   1429     bx     lr
   1430 .endm
   1431 
   1432 .macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
   1433 ENTRY \name
   1434     // Fast path array allocation for region tlab allocation.
   1435     // r0: mirror::Class* type
   1436     // r1: int32_t component_count
   1437     // r9: thread
   1438     // r2, r3, r12: free.
   1439     \size_setup .Lslow_path\name
   1440     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name
   1441 .Lslow_path\name:
   1442     // r0: mirror::Class* klass
   1443     // r1: int32_t component_count
   1444     // r2: Thread* self
   1445     SETUP_SAVE_REFS_ONLY_FRAME r2  // save callee saves in case of GC
   1446     mov    r2, r9                  // pass Thread::Current
   1447     bl     \entrypoint
   1448     RESTORE_SAVE_REFS_ONLY_FRAME
   1449     REFRESH_MARKING_REGISTER
   1450     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1451 END \name
   1452 .endm
   1453 
   1454 .macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path
   1455     bkpt                                                    // We should never enter here.
   1456                                                             // Code below is for reference.
   1457                                                             // Possibly a large object, go slow.
   1458                                                             // Also does negative array size check.
   1459     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8)
   1460     cmp r1, r2
   1461     bhi \slow_path
   1462                                                             // Array classes are never finalizable
   1463                                                             // or uninitialized, no need to check.
   1464     ldr    r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET]    // Load component type
   1465     UNPOISON_HEAP_REF r3
   1466     ldr    r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
   1467     lsr    r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT         // Component size shift is in high 16
   1468                                                             // bits.
   1469     lsl    r2, r1, r3                                       // Calculate data size
   1470                                                             // Add array data offset and alignment.
   1471     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1472 #if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
   1473 #error Long array data offset must be 4 greater than int array data offset.
   1474 #endif
   1475 
   1476     add    r3, r3, #1                                       // Add 4 to the length only if the
   1477                                                             // component size shift is 3
   1478                                                             // (for 64 bit alignment).
   1479     and    r3, r3, #4
   1480     add    r2, r2, r3
   1481 .endm
   1482 
   1483 .macro COMPUTE_ARRAY_SIZE_8 slow_path
   1484     // Possibly a large object, go slow.
   1485     // Also does negative array size check.
   1486     movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET)
   1487     cmp r1, r2
   1488     bhi \slow_path
   1489     // Add array data offset and alignment.
   1490     add    r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1491 .endm
   1492 
   1493 .macro COMPUTE_ARRAY_SIZE_16 slow_path
   1494     // Possibly a large object, go slow.
   1495     // Also does negative array size check.
   1496     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2)
   1497     cmp r1, r2
   1498     bhi \slow_path
   1499     lsl    r2, r1, #1
   1500     // Add array data offset and alignment.
   1501     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1502 .endm
   1503 
   1504 .macro COMPUTE_ARRAY_SIZE_32 slow_path
   1505     // Possibly a large object, go slow.
   1506     // Also does negative array size check.
   1507     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4)
   1508     cmp r1, r2
   1509     bhi \slow_path
   1510     lsl    r2, r1, #2
   1511     // Add array data offset and alignment.
   1512     add    r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1513 .endm
   1514 
   1515 .macro COMPUTE_ARRAY_SIZE_64 slow_path
   1516     // Possibly a large object, go slow.
   1517     // Also does negative array size check.
   1518     movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8)
   1519     cmp r1, r2
   1520     bhi \slow_path
   1521     lsl    r2, r1, #3
   1522     // Add array data offset and alignment.
   1523     add    r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1524 .endm
   1525 
   1526 // TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove
   1527 // the entrypoint once all backends have been updated to use the size variants.
   1528 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1529 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
   1530 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
   1531 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
   1532 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
   1533 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1534 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
   1535 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
   1536 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
   1537 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
   1538 
   1539     /*
   1540      * Called by managed code when the value in rSUSPEND has been decremented to 0.
   1541      */
   1542     .extern artTestSuspendFromCode
   1543 ENTRY art_quick_test_suspend
   1544     SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl
   1545     mov    r0, rSELF
   1546     bl     artTestSuspendFromCode               @ (Thread*)
   1547     RESTORE_SAVE_EVERYTHING_FRAME
   1548     REFRESH_MARKING_REGISTER
   1549     bx     lr
   1550 END art_quick_test_suspend
   1551 
   1552 ENTRY art_quick_implicit_suspend
   1553     mov    r0, rSELF
   1554     SETUP_SAVE_REFS_ONLY_FRAME r1             @ save callee saves for stack crawl
   1555     bl     artTestSuspendFromCode             @ (Thread*)
   1556     RESTORE_SAVE_REFS_ONLY_FRAME
   1557     REFRESH_MARKING_REGISTER
   1558     bx     lr
   1559 END art_quick_implicit_suspend
   1560 
   1561     /*
   1562      * Called by managed code that is attempting to call a method on a proxy class. On entry
   1563      * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The
   1564      * frame size of the invoked proxy method agrees with a ref and args callee save frame.
   1565      */
   1566      .extern artQuickProxyInvokeHandler
   1567 ENTRY art_quick_proxy_invoke_handler
   1568     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
   1569     mov     r2, r9                 @ pass Thread::Current
   1570     mov     r3, sp                 @ pass SP
   1571     blx     artQuickProxyInvokeHandler  @ (Method* proxy method, receiver, Thread*, SP)
   1572     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1573     // Tear down the callee-save frame. Skip arg registers.
   1574     add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1575     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1576     RESTORE_SAVE_REFS_ONLY_FRAME
   1577     REFRESH_MARKING_REGISTER
   1578     cbnz    r2, 1f                 @ success if no exception is pending
   1579     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1580     bx      lr                     @ return on success
   1581 1:
   1582     DELIVER_PENDING_EXCEPTION
   1583 END art_quick_proxy_invoke_handler
   1584 
   1585     /*
   1586      * Called to resolve an imt conflict.
   1587      * r0 is the conflict ArtMethod.
   1588      * r12 is a hidden argument that holds the target interface method's dex method index.
   1589      *
   1590      * Note that this stub writes to r0, r4, and r12.
   1591      */
   1592     .extern artLookupResolvedMethod
   1593 ENTRY art_quick_imt_conflict_trampoline
   1594     push    {r1-r2}
   1595     .cfi_adjust_cfa_offset (2 * 4)
   1596     .cfi_rel_offset r1, 0
   1597     .cfi_rel_offset r2, 4
   1598     ldr     r4, [sp, #(2 * 4)]  // Load referrer.
   1599     ldr     r2, [r0, #ART_METHOD_JNI_OFFSET_32]  // Load ImtConflictTable
   1600     // Load the declaring class (without read barrier) and access flags (for obsolete method check).
   1601     // The obsolete flag is set with suspended threads, so we do not need an acquire operation here.
   1602 #if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4
   1603 #error "Expecting declaring class and access flags to be consecutive for LDRD."
   1604 #endif
   1605     ldrd    r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET]
   1606     // If the method is obsolete, just go through the dex cache miss slow path.
   1607     lsrs    r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1)
   1608     bcs     .Limt_conflict_trampoline_dex_cache_miss
   1609     ldr     r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET]  // Load the DexCache (without read barrier).
   1610     UNPOISON_HEAP_REF r4
   1611     ubfx    r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS  // Calculate DexCache method slot index.
   1612     ldr     r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET]  // Load the resolved methods.
   1613     add     r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1)  // Load DexCache method slot address.
   1614 
   1615 // FIXME: Configure the build to use the faster code when appropriate.
   1616 //        Currently we fall back to the slower version.
   1617 #if HAS_ATOMIC_LDRD
   1618     ldrd    r0, r1, [r4]
   1619 #else
   1620     push    {r3}
   1621     .cfi_adjust_cfa_offset 4
   1622     .cfi_rel_offset r3, 0
   1623 .Limt_conflict_trampoline_retry_load:
   1624     ldrexd  r0, r1, [r4]
   1625     strexd  r3, r0, r1, [r4]
   1626     cmp     r3, #0
   1627     bne     .Limt_conflict_trampoline_retry_load
   1628     pop     {r3}
   1629     .cfi_adjust_cfa_offset -4
   1630     .cfi_restore r3
   1631 #endif
   1632 
   1633     ldr     r4, [r2]  // Load first entry in ImtConflictTable.
   1634     cmp     r1, r12   // Compare method index to see if we had a DexCache method hit.
   1635     bne     .Limt_conflict_trampoline_dex_cache_miss
   1636 .Limt_table_iterate:
   1637     cmp     r4, r0
   1638     // Branch if found. Benchmarks have shown doing a branch here is better.
   1639     beq     .Limt_table_found
   1640     // If the entry is null, the interface method is not in the ImtConflictTable.
   1641     cbz     r4, .Lconflict_trampoline
   1642     // Iterate over the entries of the ImtConflictTable.
   1643     ldr     r4, [r2, #(2 * __SIZEOF_POINTER__)]!
   1644     b .Limt_table_iterate
   1645 .Limt_table_found:
   1646     // We successfully hit an entry in the table. Load the target method
   1647     // and jump to it.
   1648     ldr     r0, [r2, #__SIZEOF_POINTER__]
   1649     .cfi_remember_state
   1650     pop     {r1-r2}
   1651     .cfi_adjust_cfa_offset -(2 * 4)
   1652     .cfi_restore r1
   1653     .cfi_restore r2
   1654     ldr     pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32]
   1655     .cfi_restore_state
   1656 .Lconflict_trampoline:
   1657     // Call the runtime stub to populate the ImtConflictTable and jump to the
   1658     // resolved method.
   1659     .cfi_remember_state
   1660     pop     {r1-r2}
   1661     .cfi_adjust_cfa_offset -(2 * 4)
   1662     .cfi_restore r1
   1663     .cfi_restore r2
   1664     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
   1665     .cfi_restore_state
   1666 .Limt_conflict_trampoline_dex_cache_miss:
   1667     // We're not creating a proper runtime method frame here,
   1668     // artLookupResolvedMethod() is not allowed to walk the stack.
   1669 
   1670     // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
   1671     push    {r2-r4, lr}
   1672     .cfi_adjust_cfa_offset (4 * 4)
   1673     .cfi_rel_offset r3, 4
   1674     .cfi_rel_offset lr, 12
   1675     // Save FPR args.
   1676     vpush   {d0-d7}
   1677     .cfi_adjust_cfa_offset (8 * 8)
   1678 
   1679     mov     r0, ip                      // Pass method index.
   1680     ldr     r1, [sp, #(8 * 8 + 6 * 4)]  // Pass referrer.
   1681     bl      artLookupResolvedMethod     // (uint32_t method_index, ArtMethod* referrer)
   1682 
   1683     // Restore FPR args.
   1684     vpop    {d0-d7}
   1685     .cfi_adjust_cfa_offset -(8 * 8)
   1686     // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr).
   1687     pop     {r2-r4, lr}
   1688     .cfi_adjust_cfa_offset -(4 * 4)
   1689     .cfi_restore r3
   1690     .cfi_restore lr
   1691 
   1692     cmp     r0, #0                  // If the method wasn't resolved,
   1693     beq     .Lconflict_trampoline   //   skip the lookup and go to artInvokeInterfaceTrampoline().
   1694     b       .Limt_table_iterate
   1695 END art_quick_imt_conflict_trampoline
   1696 
   1697     .extern artQuickResolutionTrampoline
   1698 ENTRY art_quick_resolution_trampoline
   1699     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   1700     mov     r2, r9                 @ pass Thread::Current
   1701     mov     r3, sp                 @ pass SP
   1702     blx     artQuickResolutionTrampoline  @ (Method* called, receiver, Thread*, SP)
   1703     cbz     r0, 1f                 @ is code pointer null? goto exception
   1704     mov     r12, r0
   1705     ldr     r0, [sp, #0]           @ load resolved method in r0
   1706     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1707     REFRESH_MARKING_REGISTER
   1708     bx      r12                    @ tail-call into actual code
   1709 1:
   1710     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1711     DELIVER_PENDING_EXCEPTION
   1712 END art_quick_resolution_trampoline
   1713 
   1714     /*
   1715      * Called to do a generic JNI down-call
   1716      */
   1717 ENTRY art_quick_generic_jni_trampoline
   1718     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0
   1719 
   1720     // Save rSELF
   1721     mov r11, rSELF
   1722     // Save SP , so we can have static CFI info. r10 is saved in ref_and_args.
   1723     mov r10, sp
   1724     .cfi_def_cfa_register r10
   1725 
   1726     sub sp, sp, #5120
   1727 
   1728     // prepare for artQuickGenericJniTrampoline call
   1729     // (Thread*,  SP)
   1730     //    r0      r1   <= C calling convention
   1731     //  rSELF     r10  <= where they are
   1732 
   1733     mov r0, rSELF   // Thread*
   1734     mov r1, r10
   1735     blx artQuickGenericJniTrampoline  // (Thread*, sp)
   1736 
   1737     // The C call will have registered the complete save-frame on success.
   1738     // The result of the call is:
   1739     // r0: pointer to native code, 0 on error.
   1740     // r1: pointer to the bottom of the used area of the alloca, can restore stack till there.
   1741 
   1742     // Check for error = 0.
   1743     cbz r0, .Lexception_in_native
   1744 
   1745     // Release part of the alloca.
   1746     mov sp, r1
   1747 
   1748     // Save the code pointer
   1749     mov r12, r0
   1750 
   1751     // Load parameters from frame into registers.
   1752     pop {r0-r3}
   1753 
   1754     // Softfloat.
   1755     // TODO: Change to hardfloat when supported.
   1756 
   1757     blx r12           // native call.
   1758 
   1759     // result sign extension is handled in C code
   1760     // prepare for artQuickGenericJniEndTrampoline call
   1761     // (Thread*, result, result_f)
   1762     //    r0      r2,r3    stack       <= C calling convention
   1763     //    r11     r0,r1    r0,r1          <= where they are
   1764     sub sp, sp, #8 // Stack alignment.
   1765 
   1766     push {r0-r1}
   1767     mov r3, r1
   1768     mov r2, r0
   1769     mov r0, r11
   1770 
   1771     blx artQuickGenericJniEndTrampoline
   1772 
   1773     // Restore self pointer.
   1774     mov r9, r11
   1775 
   1776     // Pending exceptions possible.
   1777     ldr r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1778     cbnz r2, .Lexception_in_native
   1779 
   1780     // Tear down the alloca.
   1781     mov sp, r10
   1782     .cfi_def_cfa_register sp
   1783 
   1784     // Tear down the callee-save frame. Skip arg registers.
   1785     add     sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
   1786     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY)
   1787     RESTORE_SAVE_REFS_ONLY_FRAME
   1788     REFRESH_MARKING_REGISTER
   1789 
   1790     // store into fpr, for when it's a fpr return...
   1791     vmov d0, r0, r1
   1792     bx lr      // ret
   1793     // Undo the unwinding information from above since it doesn't apply below.
   1794     .cfi_def_cfa_register r10
   1795     .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY
   1796 
   1797 .Lexception_in_native:
   1798     ldr ip, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET]
   1799     add ip, ip, #-1  // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE.
   1800     mov sp, ip
   1801     .cfi_def_cfa_register sp
   1802     # This will create a new save-all frame, required by the runtime.
   1803     DELIVER_PENDING_EXCEPTION
   1804 END art_quick_generic_jni_trampoline
   1805 
   1806     .extern artQuickToInterpreterBridge
   1807 ENTRY art_quick_to_interpreter_bridge
   1808     SETUP_SAVE_REFS_AND_ARGS_FRAME r1
   1809     mov     r1, r9                 @ pass Thread::Current
   1810     mov     r2, sp                 @ pass SP
   1811     blx     artQuickToInterpreterBridge    @ (Method* method, Thread*, SP)
   1812     ldr     r2, [r9, #THREAD_EXCEPTION_OFFSET]  @ load Thread::Current()->exception_
   1813     // Tear down the callee-save frame. Skip arg registers.
   1814     add     sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1815     .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY)
   1816     RESTORE_SAVE_REFS_ONLY_FRAME
   1817     REFRESH_MARKING_REGISTER
   1818     cbnz    r2, 1f                 @ success if no exception is pending
   1819     vmov    d0, r0, r1             @ store into fpr, for when it's a fpr return...
   1820     bx      lr                     @ return on success
   1821 1:
   1822     DELIVER_PENDING_EXCEPTION
   1823 END art_quick_to_interpreter_bridge
   1824 
   1825 /*
   1826  * Called to attempt to execute an obsolete method.
   1827  */
   1828 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
   1829 
   1830     /*
   1831      * Routine that intercepts method calls and returns.
   1832      */
   1833     .extern artInstrumentationMethodEntryFromCode
   1834     .extern artInstrumentationMethodExitFromCode
   1835 ENTRY art_quick_instrumentation_entry
   1836     @ Make stack crawlable and clobber r2 and r3 (post saving)
   1837     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   1838     @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs.
   1839     str   r0, [sp, #4]
   1840     mov   r2, r9         @ pass Thread::Current
   1841     mov   r3, sp         @ pass SP
   1842     blx   artInstrumentationMethodEntryFromCode  @ (Method*, Object*, Thread*, SP)
   1843     cbz   r0, .Ldeliver_instrumentation_entry_exception
   1844                          @ Deliver exception if we got nullptr as function.
   1845     mov   r12, r0        @ r12 holds reference to code
   1846     ldr   r0, [sp, #4]   @ restore r0
   1847     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1848     adr   lr, art_quick_instrumentation_exit + /* thumb mode */ 1
   1849                          @ load art_quick_instrumentation_exit into lr in thumb mode
   1850     REFRESH_MARKING_REGISTER
   1851     bx    r12            @ call method with lr set to art_quick_instrumentation_exit
   1852 .Ldeliver_instrumentation_entry_exception:
   1853     @ Deliver exception for art_quick_instrumentation_entry placed after
   1854     @ art_quick_instrumentation_exit so that the fallthrough works.
   1855     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1856     DELIVER_PENDING_EXCEPTION
   1857 END art_quick_instrumentation_entry
   1858 
   1859 ENTRY art_quick_instrumentation_exit
   1860     mov   lr, #0         @ link register is to here, so clobber with 0 for later checks
   1861     SETUP_SAVE_EVERYTHING_FRAME r2
   1862 
   1863     add   r3, sp, #8     @ store fpr_res pointer, in kSaveEverything frame
   1864     add   r2, sp, #136   @ store gpr_res pointer, in kSaveEverything frame
   1865     mov   r1, sp         @ pass SP
   1866     mov   r0, r9         @ pass Thread::Current
   1867     blx   artInstrumentationMethodExitFromCode  @ (Thread*, SP, gpr_res*, fpr_res*)
   1868 
   1869     cbz   r0, .Ldo_deliver_instrumentation_exception
   1870                          @ Deliver exception if we got nullptr as function.
   1871     cbnz  r1, .Ldeoptimize
   1872     // Normal return.
   1873     str   r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
   1874                          @ Set return pc.
   1875     RESTORE_SAVE_EVERYTHING_FRAME
   1876     REFRESH_MARKING_REGISTER
   1877     bx lr
   1878 .Ldo_deliver_instrumentation_exception:
   1879     DELIVER_PENDING_EXCEPTION_FRAME_READY
   1880 .Ldeoptimize:
   1881     str   r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4]
   1882                          @ Set return pc.
   1883     RESTORE_SAVE_EVERYTHING_FRAME
   1884     // Jump to art_quick_deoptimize.
   1885     b     art_quick_deoptimize
   1886 END art_quick_instrumentation_exit
   1887 
   1888     /*
   1889      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   1890      * will long jump to the upcall with a special exception of -1.
   1891      */
   1892     .extern artDeoptimize
   1893 ENTRY art_quick_deoptimize
   1894     SETUP_SAVE_EVERYTHING_FRAME r0
   1895     mov    r0, r9         @ pass Thread::Current
   1896     blx    artDeoptimize  @ (Thread*)
   1897 END art_quick_deoptimize
   1898 
   1899     /*
   1900      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
   1901      * will long jump to the interpreter bridge.
   1902      */
   1903     .extern artDeoptimizeFromCompiledCode
   1904 ENTRY art_quick_deoptimize_from_compiled_code
   1905     SETUP_SAVE_EVERYTHING_FRAME r1
   1906     mov    r1, r9                         @ pass Thread::Current
   1907     blx    artDeoptimizeFromCompiledCode  @ (DeoptimizationKind, Thread*)
   1908 END art_quick_deoptimize_from_compiled_code
   1909 
   1910     /*
   1911      * Signed 64-bit integer multiply.
   1912      *
   1913      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
   1914      *        WX
   1915      *      x YZ
   1916      *  --------
   1917      *     ZW ZX
   1918      *  YW YX
   1919      *
   1920      * The low word of the result holds ZX, the high word holds
   1921      * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
   1922      * it doesn't fit in the low 64 bits.
   1923      *
   1924      * Unlike most ARM math operations, multiply instructions have
   1925      * restrictions on using the same register more than once (Rd and Rm
   1926      * cannot be the same).
   1927      */
   1928     /* mul-long vAA, vBB, vCC */
   1929 ENTRY art_quick_mul_long
   1930     push    {r9-r10}
   1931     .cfi_adjust_cfa_offset 8
   1932     .cfi_rel_offset r9, 0
   1933     .cfi_rel_offset r10, 4
   1934     mul     ip, r2, r1                  @  ip<- ZxW
   1935     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
   1936     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
   1937     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
   1938     mov     r0,r9
   1939     mov     r1,r10
   1940     pop     {r9-r10}
   1941     .cfi_adjust_cfa_offset -8
   1942     .cfi_restore r9
   1943     .cfi_restore r10
   1944     bx      lr
   1945 END art_quick_mul_long
   1946 
   1947     /*
   1948      * Long integer shift.  This is different from the generic 32/64-bit
   1949      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1950      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1951      * 6 bits.
   1952      * On entry:
   1953      *   r0: low word
   1954      *   r1: high word
   1955      *   r2: shift count
   1956      */
   1957     /* shl-long vAA, vBB, vCC */
   1958 ARM_ENTRY art_quick_shl_long            @ ARM code as thumb code requires spills
   1959     and     r2, r2, #63                 @ r2<- r2 & 0x3f
   1960     mov     r1, r1, asl r2              @  r1<- r1 << r2
   1961     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1962     orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
   1963     subs    ip, r2, #32                 @  ip<- r2 - 32
   1964     movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
   1965     mov     r0, r0, asl r2              @  r0<- r0 << r2
   1966     bx      lr
   1967 END art_quick_shl_long
   1968 
   1969     /*
   1970      * Long integer shift.  This is different from the generic 32/64-bit
   1971      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1972      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1973      * 6 bits.
   1974      * On entry:
   1975      *   r0: low word
   1976      *   r1: high word
   1977      *   r2: shift count
   1978      */
   1979     /* shr-long vAA, vBB, vCC */
   1980 ARM_ENTRY art_quick_shr_long            @ ARM code as thumb code requires spills
   1981     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   1982     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   1983     rsb     r3, r2, #32                 @  r3<- 32 - r2
   1984     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   1985     subs    ip, r2, #32                 @  ip<- r2 - 32
   1986     movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
   1987     mov     r1, r1, asr r2              @  r1<- r1 >> r2
   1988     bx      lr
   1989 END art_quick_shr_long
   1990 
   1991     /*
   1992      * Long integer shift.  This is different from the generic 32/64-bit
   1993      * binary operations because vAA/vBB are 64-bit but vCC (the shift
   1994      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
   1995      * 6 bits.
   1996      * On entry:
   1997      *   r0: low word
   1998      *   r1: high word
   1999      *   r2: shift count
   2000      */
   2001     /* ushr-long vAA, vBB, vCC */
   2002 ARM_ENTRY art_quick_ushr_long           @ ARM code as thumb code requires spills
   2003     and     r2, r2, #63                 @ r0<- r0 & 0x3f
   2004     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
   2005     rsb     r3, r2, #32                 @  r3<- 32 - r2
   2006     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
   2007     subs    ip, r2, #32                 @  ip<- r2 - 32
   2008     movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
   2009     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
   2010     bx      lr
   2011 END art_quick_ushr_long
   2012 
   2013     /*
   2014      * String's indexOf.
   2015      *
   2016      * On entry:
   2017      *    r0:   string object (known non-null)
   2018      *    r1:   char to match (known <= 0xFFFF)
   2019      *    r2:   Starting offset in string data
   2020      */
   2021 ENTRY art_quick_indexof
   2022     push {r4, r10-r11, lr} @ 4 words of callee saves
   2023     .cfi_adjust_cfa_offset 16
   2024     .cfi_rel_offset r4, 0
   2025     .cfi_rel_offset r10, 4
   2026     .cfi_rel_offset r11, 8
   2027     .cfi_rel_offset lr, 12
   2028 #if (STRING_COMPRESSION_FEATURE)
   2029     ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
   2030 #else
   2031     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
   2032 #endif
   2033     add   r0, #MIRROR_STRING_VALUE_OFFSET
   2034 #if (STRING_COMPRESSION_FEATURE)
   2035     /* r4 count (with flag) and r3 holds actual length */
   2036     lsr   r3, r4, #1
   2037 #endif
   2038     /* Clamp start to [0..count] */
   2039     cmp   r2, #0
   2040     it    lt
   2041     movlt r2, #0
   2042     cmp   r2, r3
   2043     it    gt
   2044     movgt r2, r3
   2045 
   2046     /* Save a copy in r12 to later compute result */
   2047     mov   r12, r0
   2048 
   2049     /* Build pointer to start of data to compare and pre-bias */
   2050 #if (STRING_COMPRESSION_FEATURE)
   2051     lsrs  r4, r4, #1
   2052     bcc   .Lstring_indexof_compressed
   2053 #endif
   2054     add   r0, r0, r2, lsl #1
   2055     sub   r0, #2
   2056 
   2057     /* Compute iteration count */
   2058     sub   r2, r3, r2
   2059 
   2060     /*
   2061      * At this point we have:
   2062      *   r0: start of data to test
   2063      *   r1: char to compare
   2064      *   r2: iteration count
   2065      *   r4: compression style (used temporarily)
   2066      *   r12: original start of string data
   2067      *   r3, r4, r10, r11 available for loading string data
   2068      */
   2069 
   2070     subs  r2, #4
   2071     blt   .Lindexof_remainder
   2072 
   2073 .Lindexof_loop4:
   2074     ldrh  r3, [r0, #2]!
   2075     ldrh  r4, [r0, #2]!
   2076     ldrh  r10, [r0, #2]!
   2077     ldrh  r11, [r0, #2]!
   2078     cmp   r3, r1
   2079     beq   .Lmatch_0
   2080     cmp   r4, r1
   2081     beq   .Lmatch_1
   2082     cmp   r10, r1
   2083     beq   .Lmatch_2
   2084     cmp   r11, r1
   2085     beq   .Lmatch_3
   2086     subs  r2, #4
   2087     bge   .Lindexof_loop4
   2088 
   2089 .Lindexof_remainder:
   2090     adds  r2, #4
   2091     beq   .Lindexof_nomatch
   2092 
   2093 .Lindexof_loop1:
   2094     ldrh  r3, [r0, #2]!
   2095     cmp   r3, r1
   2096     beq   .Lmatch_3
   2097     subs  r2, #1
   2098     bne   .Lindexof_loop1
   2099 
   2100 .Lindexof_nomatch:
   2101     mov   r0, #-1
   2102     pop {r4, r10-r11, pc}
   2103 
   2104 .Lmatch_0:
   2105     sub   r0, #6
   2106     sub   r0, r12
   2107     asr   r0, r0, #1
   2108     pop {r4, r10-r11, pc}
   2109 .Lmatch_1:
   2110     sub   r0, #4
   2111     sub   r0, r12
   2112     asr   r0, r0, #1
   2113     pop {r4, r10-r11, pc}
   2114 .Lmatch_2:
   2115     sub   r0, #2
   2116     sub   r0, r12
   2117     asr   r0, r0, #1
   2118     pop {r4, r10-r11, pc}
   2119 .Lmatch_3:
   2120     sub   r0, r12
   2121     asr   r0, r0, #1
   2122     pop {r4, r10-r11, pc}
   2123 #if (STRING_COMPRESSION_FEATURE)
   2124 .Lstring_indexof_compressed:
   2125     add   r0, r0, r2
   2126     sub   r0, #1
   2127     sub   r2, r3, r2
   2128 .Lstring_indexof_compressed_loop:
   2129     subs  r2, #1
   2130     blt   .Lindexof_nomatch
   2131     ldrb  r3, [r0, #1]!
   2132     cmp   r3, r1
   2133     beq   .Lstring_indexof_compressed_matched
   2134     b     .Lstring_indexof_compressed_loop
   2135 .Lstring_indexof_compressed_matched:
   2136     sub   r0, r12
   2137     pop {r4, r10-r11, pc}
   2138 #endif
   2139 END art_quick_indexof
   2140 
   2141     /* Assembly routines used to handle ABI differences. */
   2142 
   2143     /* double fmod(double a, double b) */
   2144     .extern fmod
   2145 ENTRY art_quick_fmod
   2146     push  {lr}
   2147     .cfi_adjust_cfa_offset 4
   2148     .cfi_rel_offset lr, 0
   2149     sub   sp, #4
   2150     .cfi_adjust_cfa_offset 4
   2151     vmov  r0, r1, d0
   2152     vmov  r2, r3, d1
   2153     bl    fmod
   2154     vmov  d0, r0, r1
   2155     add   sp, #4
   2156     .cfi_adjust_cfa_offset -4
   2157     pop   {pc}
   2158 END art_quick_fmod
   2159 
   2160     /* float fmodf(float a, float b) */
   2161      .extern fmodf
   2162 ENTRY art_quick_fmodf
   2163     push  {lr}
   2164     .cfi_adjust_cfa_offset 4
   2165     .cfi_rel_offset lr, 0
   2166     sub   sp, #4
   2167     .cfi_adjust_cfa_offset 4
   2168     vmov  r0, r1, d0
   2169     bl    fmodf
   2170     vmov  s0, r0
   2171     add   sp, #4
   2172     .cfi_adjust_cfa_offset -4
   2173     pop   {pc}
   2174 END art_quick_fmodf
   2175 
   2176     /* int64_t art_d2l(double d) */
   2177     .extern art_d2l
   2178 ENTRY art_quick_d2l
   2179     vmov  r0, r1, d0
   2180     b     art_d2l
   2181 END art_quick_d2l
   2182 
   2183     /* int64_t art_f2l(float f) */
   2184     .extern art_f2l
   2185 ENTRY art_quick_f2l
   2186     vmov  r0, s0
   2187     b     art_f2l
   2188 END art_quick_f2l
   2189 
   2190     /* float art_l2f(int64_t l) */
   2191     .extern art_l2f
   2192 ENTRY art_quick_l2f
   2193     push  {lr}
   2194     .cfi_adjust_cfa_offset 4
   2195     .cfi_rel_offset lr, 0
   2196     sub   sp, #4
   2197     .cfi_adjust_cfa_offset 4
   2198     bl    art_l2f
   2199     vmov  s0, r0
   2200     add   sp, #4
   2201     .cfi_adjust_cfa_offset -4
   2202     pop   {pc}
   2203 END art_quick_l2f
   2204 
   2205 .macro CONDITIONAL_CBZ reg, reg_if, dest
   2206 .ifc \reg, \reg_if
   2207     cbz \reg, \dest
   2208 .endif
   2209 .endm
   2210 
   2211 .macro CONDITIONAL_CMPBZ reg, reg_if, dest
   2212 .ifc \reg, \reg_if
   2213     cmp \reg, #0
   2214     beq \dest
   2215 .endif
   2216 .endm
   2217 
   2218 // Use CBZ if the register is in {r0, r7} otherwise compare and branch.
   2219 .macro SMART_CBZ reg, dest
   2220     CONDITIONAL_CBZ \reg, r0, \dest
   2221     CONDITIONAL_CBZ \reg, r1, \dest
   2222     CONDITIONAL_CBZ \reg, r2, \dest
   2223     CONDITIONAL_CBZ \reg, r3, \dest
   2224     CONDITIONAL_CBZ \reg, r4, \dest
   2225     CONDITIONAL_CBZ \reg, r5, \dest
   2226     CONDITIONAL_CBZ \reg, r6, \dest
   2227     CONDITIONAL_CBZ \reg, r7, \dest
   2228     CONDITIONAL_CMPBZ \reg, r8, \dest
   2229     CONDITIONAL_CMPBZ \reg, r9, \dest
   2230     CONDITIONAL_CMPBZ \reg, r10, \dest
   2231     CONDITIONAL_CMPBZ \reg, r11, \dest
   2232     CONDITIONAL_CMPBZ \reg, r12, \dest
   2233     CONDITIONAL_CMPBZ \reg, r13, \dest
   2234     CONDITIONAL_CMPBZ \reg, r14, \dest
   2235     CONDITIONAL_CMPBZ \reg, r15, \dest
   2236 .endm
   2237 
   2238     /*
   2239      * Create a function `name` calling the ReadBarrier::Mark routine,
   2240      * getting its argument and returning its result through register
   2241      * `reg`, saving and restoring all caller-save registers.
   2242      *
   2243      * IP is clobbered; `reg` must not be IP.
   2244      *
   2245      * If `reg` is different from `r0`, the generated function follows a
   2246      * non-standard runtime calling convention:
   2247      * - register `reg` is used to pass the (sole) argument of this
   2248      *   function (instead of R0);
   2249      * - register `reg` is used to return the result of this function
   2250      *   (instead of R0);
   2251      * - R0 is treated like a normal (non-argument) caller-save register;
   2252      * - everything else is the same as in the standard runtime calling
   2253      *   convention (e.g. standard callee-save registers are preserved).
   2254      */
   2255 .macro READ_BARRIER_MARK_REG name, reg
   2256 ENTRY \name
   2257     // Null check so that we can load the lock word.
   2258     SMART_CBZ \reg, .Lret_rb_\name
   2259     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
   2260     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2261     tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
   2262     beq .Lnot_marked_rb_\name
   2263     // Already marked, return right away.
   2264 .Lret_rb_\name:
   2265     bx lr
   2266 
   2267 .Lnot_marked_rb_\name:
   2268     // Test that both the forwarding state bits are 1.
   2269 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
   2270     // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
   2271     // the highest bits and the "forwarding address" state to have all bits set.
   2272 #error "Unexpected lock word state shift or forwarding address state value."
   2273 #endif
   2274     cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
   2275     bhs .Lret_forwarding_address\name
   2276 
   2277 .Lslow_rb_\name:
   2278     // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to
   2279     // make a tail call here. Currently, it serves only for stack alignment but
   2280     // we may reintroduce kSaveEverything calls here in the future.
   2281     push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
   2282     .cfi_adjust_cfa_offset 32
   2283     .cfi_rel_offset r0, 0
   2284     .cfi_rel_offset r1, 4
   2285     .cfi_rel_offset r2, 8
   2286     .cfi_rel_offset r3, 12
   2287     .cfi_rel_offset r4, 16
   2288     .cfi_rel_offset r9, 20
   2289     .cfi_rel_offset ip, 24
   2290     .cfi_rel_offset lr, 28
   2291 
   2292     .ifnc \reg, r0
   2293       mov   r0, \reg                    @ pass arg1 - obj from `reg`
   2294     .endif
   2295 
   2296     vpush {s0-s15}                      @ save floating-point caller-save registers
   2297     .cfi_adjust_cfa_offset 64
   2298     bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
   2299     vpop {s0-s15}                       @ restore floating-point registers
   2300     .cfi_adjust_cfa_offset -64
   2301 
   2302     .ifc \reg, r0                       @ Save result to the stack slot or destination register.
   2303       str r0, [sp, #0]
   2304     .else
   2305       .ifc \reg, r1
   2306         str r0, [sp, #4]
   2307       .else
   2308         .ifc \reg, r2
   2309           str r0, [sp, #8]
   2310         .else
   2311           .ifc \reg, r3
   2312             str r0, [sp, #12]
   2313           .else
   2314             .ifc \reg, r4
   2315               str r0, [sp, #16]
   2316             .else
   2317               .ifc \reg, r9
   2318                 str r0, [sp, #20]
   2319               .else
   2320                 mov \reg, r0
   2321               .endif
   2322             .endif
   2323           .endif
   2324         .endif
   2325       .endif
   2326     .endif
   2327 
   2328     pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
   2329     .cfi_adjust_cfa_offset -32
   2330     .cfi_restore r0
   2331     .cfi_restore r1
   2332     .cfi_restore r2
   2333     .cfi_restore r3
   2334     .cfi_restore r4
   2335     .cfi_restore r9
   2336     .cfi_restore ip
   2337     .cfi_restore lr
   2338     bx lr
   2339 .Lret_forwarding_address\name:
   2340     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2341     // in the top 2 bits of the lock word.
   2342     lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2343     bx lr
   2344 END \name
   2345 .endm
   2346 
   2347 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0
   2348 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1
   2349 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2
   2350 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3
   2351 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4
   2352 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5
   2353 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6
   2354 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7
   2355 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8
   2356 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9
   2357 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10
   2358 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11
   2359 
   2360 // Helper macros for Baker CC read barrier mark introspection (BRBMI).
   2361 .macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register
   2362     \macro_for_register r0
   2363     \macro_for_register r1
   2364     \macro_for_register r2
   2365     \macro_for_register r3
   2366     \macro_for_reserved_register  // R4 is reserved for the entrypoint address.
   2367     \macro_for_register r5
   2368     \macro_for_register r6
   2369     \macro_for_register r7
   2370     \macro_for_register r8
   2371     \macro_for_register r9
   2372     \macro_for_register r10
   2373     \macro_for_register r11
   2374 .endm
   2375 
   2376 .macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register
   2377     BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register
   2378     \macro_for_reserved_register  // IP is reserved.
   2379     \macro_for_reserved_register  // SP is reserved.
   2380     \macro_for_reserved_register  // LR is reserved.
   2381     \macro_for_reserved_register  // PC is reserved.
   2382 .endm
   2383 
   2384 .macro BRBMI_RETURN_SWITCH_CASE reg
   2385 .Lmark_introspection_return_switch_case_\reg:
   2386     mov     \reg, ip
   2387     bx      lr
   2388 .endm
   2389 
   2390 .macro BRBMI_BAD_RETURN_SWITCH_CASE
   2391 .Lmark_introspection_return_switch_case_bad:
   2392     BRBMI_BKPT_FILL_4B
   2393 .endm
   2394 
   2395 .macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg
   2396     .byte   (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2
   2397 .endm
   2398 
   2399 .macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
   2400     .byte   (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2
   2401 .endm
   2402 
   2403 #if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
   2404 #error "Array and field introspection code sharing requires same LDR offset."
   2405 #endif
   2406 .macro BRBMI_ARRAY_LOAD index_reg
   2407     ldr     ip, [ip, \index_reg, lsl #2]                // 4 bytes.
   2408     b       art_quick_read_barrier_mark_introspection   // Should be 2 bytes, encoding T2.
   2409     .balign 8                                           // Add padding to 8 bytes.
   2410 .endm
   2411 
   2412 .macro BRBMI_BKPT_FILL_4B
   2413     bkpt    0
   2414     bkpt    0
   2415 .endm
   2416 
   2417 .macro BRBMI_BKPT_FILL_8B
   2418     BRBMI_BKPT_FILL_4B
   2419     BRBMI_BKPT_FILL_4B
   2420 .endm
   2421 
   2422 .macro BRBMI_RUNTIME_CALL
   2423     // Note: This macro generates exactly 22 bytes of code. The core register
   2424     // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions.
   2425 
   2426     push   {r0-r3, r7, lr}            // Save return address and caller-save registers.
   2427     .cfi_adjust_cfa_offset 24
   2428     .cfi_rel_offset r0, 0
   2429     .cfi_rel_offset r1, 4
   2430     .cfi_rel_offset r2, 8
   2431     .cfi_rel_offset r3, 12
   2432     .cfi_rel_offset r7, 16
   2433     .cfi_rel_offset lr, 20
   2434 
   2435     mov     r0, ip                    // Pass the reference.
   2436     vpush {s0-s15}                    // save floating-point caller-save registers
   2437     .cfi_adjust_cfa_offset 64
   2438     bl      artReadBarrierMark        // r0 <- artReadBarrierMark(obj)
   2439     vpop    {s0-s15}                  // restore floating-point registers
   2440     .cfi_adjust_cfa_offset -64
   2441     mov     ip, r0                    // Move reference to ip in preparation for return switch.
   2442 
   2443     pop     {r0-r3, r7, lr}           // Restore registers.
   2444     .cfi_adjust_cfa_offset -24
   2445     .cfi_restore r0
   2446     .cfi_restore r1
   2447     .cfi_restore r2
   2448     .cfi_restore r3
   2449     .cfi_restore r7
   2450     .cfi_restore lr
   2451 .endm
   2452 
   2453 .macro BRBMI_CHECK_NULL_AND_MARKED label_suffix
   2454     // If reference is null, just return it in the right register.
   2455     cmp     ip, #0
   2456     beq     .Lmark_introspection_return\label_suffix
   2457     // Use R4 as temp and check the mark bit of the reference.
   2458     ldr     r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2459     tst     r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
   2460     beq     .Lmark_introspection_unmarked\label_suffix
   2461 .Lmark_introspection_return\label_suffix:
   2462 .endm
   2463 
   2464 .macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix
   2465 .Lmark_introspection_unmarked\label_suffix:
   2466     // Check if the top two bits are one, if this is the case it is a forwarding address.
   2467 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3)
   2468     // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in
   2469     // the highest bits and the "forwarding address" state to have all bits set.
   2470 #error "Unexpected lock word state shift or forwarding address state value."
   2471 #endif
   2472     cmp     r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
   2473     bhs     .Lmark_introspection_forwarding_address\label_suffix
   2474 .endm
   2475 
   2476 .macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix
   2477 .Lmark_introspection_forwarding_address\label_suffix:
   2478     // Note: This macro generates exactly 22 bytes of code, the branch is near.
   2479 
   2480     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2481     // in the top 2 bits of the lock word.
   2482     lsl     ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2483     b       .Lmark_introspection_return\label_suffix
   2484 .endm
   2485 
   2486 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset
   2487     // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR.
   2488     ldrh    r4, [lr, #(-1 + \ldr_offset + 2)]
   2489 .endm
   2490 
   2491 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset
   2492     // Load the 16-bit instruction. Adjust for the thumb state in LR.
   2493     ldrh    r4, [lr, #(-1 + \ldr_offset)]
   2494 .endm
   2495 
   2496 .macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix
   2497     .balign 64
   2498     .thumb_func
   2499     .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function
   2500     .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
   2501     .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix
   2502 art_quick_read_barrier_mark_introspection_gc_roots\label_suffix:
   2503     BRBMI_RUNTIME_CALL
   2504     // Load the LDR (or the half of it) that contains Rt.
   2505     BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset
   2506     b       .Lmark_introspection_extract_register_and_return\label_suffix
   2507     // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for
   2508     // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze
   2509     // the 6 byte forwarding address extraction here across the 32-byte boundary.
   2510     BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix
   2511     // And the slow path taking exactly 30 bytes (6 bytes for the forwarding
   2512     // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near
   2513     // branch) shall take the rest of the 32-byte section (within a cache line).
   2514     BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix
   2515     BRBMI_RUNTIME_CALL
   2516     b       .Lmark_introspection_return\label_suffix
   2517 .endm
   2518 
   2519     /*
   2520      * Use introspection to load a reference from the same address as the LDR
   2521      * instruction in generated code would load (unless loaded by the thunk,
   2522      * see below), call ReadBarrier::Mark() with that reference if needed
   2523      * and return it in the same register as the LDR instruction would load.
   2524      *
   2525      * The entrypoint is called through a thunk that differs across load kinds.
   2526      * For field and array loads the LDR instruction in generated code follows
   2527      * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning)
   2528      * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where
   2529      * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk
   2530      * knows the holder and performs the gray bit check, returning to the LDR
   2531      * instruction if the object is not gray, so this entrypoint no longer
   2532      * needs to know anything about the holder. For GC root loads, the LDR
   2533      * instruction in generated code precedes the branch to the thunk, i.e. the
   2534      * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1)
   2535      * where the -1 is again the Thumb mode bit adjustment, and the thunk does
   2536      * not do the gray bit check.
   2537      *
   2538      * For field accesses and array loads with a constant index the thunk loads
   2539      * the reference into IP using introspection and calls the main entrypoint,
   2540      * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
   2541      * the passed reference is poisoned.
   2542      *
   2543      * For array accesses with non-constant index, the thunk inserts the bits
   2544      * 0-5 of the LDR instruction to the entrypoint address, effectively
   2545      * calculating a switch case label based on the index register (bits 0-3)
   2546      * and adding an extra offset (bits 4-5 hold the shift which is always 2
   2547      * for reference loads) to differentiate from the main entrypoint, then
   2548      * moves the base register to IP and jumps to the switch case. Therefore
   2549      * we need to align the main entrypoint to 512 bytes, accounting for
   2550      * a 256-byte offset followed by 16 array entrypoints starting at
   2551      * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR
   2552      * (register) and a branch to the main entrypoint.
   2553      *
   2554      * For GC root accesses we cannot use the main entrypoint because of the
   2555      * different offset where the LDR instruction in generated code is located.
   2556      * (And even with heap poisoning enabled, GC roots are not poisoned.)
   2557      * To re-use the same entrypoint pointer in generated code, we make sure
   2558      * that the gc root entrypoint (a copy of the entrypoint with a different
   2559      * offset for introspection loads) is located at a known offset (128 bytes,
   2560      * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
   2561      * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
   2562      * the root register to IP and jumps to the customized entrypoint,
   2563      * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
   2564      * performs all the fast-path checks, so we need just the slow path.
   2565      *
   2566      * The code structure is
   2567      *   art_quick_read_barrier_mark_introspection:
   2568      *     Up to 32 bytes code for main entrypoint fast-path code for fields
   2569      *     (and array elements with constant offset) with LDR encoding T3;
   2570      *     jumps to the switch in the "narrow" entrypoint.
   2571      *     Padding to 32 bytes if needed.
   2572      *   art_quick_read_barrier_mark_introspection_narrow:
   2573      *     Up to 48 bytes code for fast path code for fields (and array
   2574      *     elements with constant offset) with LDR encoding T1, ending in the
   2575      *     return switch instruction TBB and the table with switch offsets.
   2576      *     Padding to 80 bytes if needed.
   2577      *   .Lmark_introspection_return_switch_case_r0:
   2578      *     Exactly 48 bytes of code for the return switch cases (12 cases,
   2579      *     including BKPT for the reserved registers).
   2580      *     Ends at 128 bytes total.
   2581      *   art_quick_read_barrier_mark_introspection_gc_roots_wide:
   2582      *     GC root entrypoint code for LDR encoding T3 (28 bytes).
   2583      *     Forwarding address extraction for LDR encoding T3 (6 bytes).
   2584      *     Slow path for main entrypoint for LDR encoding T3 (30 bytes).
   2585      *     Ends at 192 bytes total.
   2586      *   art_quick_read_barrier_mark_introspection_gc_roots_narrow:
   2587      *     GC root entrypoint code for LDR encoding T1 (28 bytes).
   2588      *     Forwarding address extraction for LDR encoding T1 (6 bytes).
   2589      *     Slow path for main entrypoint for LDR encoding T1 (30 bytes).
   2590      *     Ends at 256 bytes total.
   2591      *   art_quick_read_barrier_mark_introspection_arrays:
   2592      *     Exactly 128 bytes for array load switch cases (16x2 instructions).
   2593      */
   2594     .balign 512
   2595 ENTRY art_quick_read_barrier_mark_introspection
   2596     // At this point, IP contains the reference, R4 can be freely used.
   2597     // (R4 is reserved for the entrypoint address.)
   2598     // For heap poisoning, the reference is poisoned, so unpoison it first.
   2599     UNPOISON_HEAP_REF ip
   2600     // Check for null or marked, lock word is loaded into IP.
   2601     BRBMI_CHECK_NULL_AND_MARKED _wide
   2602     // Load the half of the instruction that contains Rt.
   2603     BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET
   2604 .Lmark_introspection_extract_register_and_return_wide:
   2605     lsr     r4, r4, #12               // Extract `ref_reg`.
   2606     b       .Lmark_introspection_return_switch
   2607 
   2608     .balign 32
   2609     .thumb_func
   2610     .type art_quick_read_barrier_mark_introspection_narrow, #function
   2611     .hidden art_quick_read_barrier_mark_introspection_narrow
   2612     .global art_quick_read_barrier_mark_introspection_narrow
   2613 art_quick_read_barrier_mark_introspection_narrow:
   2614     // At this point, IP contains the reference, R4 can be freely used.
   2615     // (R4 is reserved for the entrypoint address.)
   2616     // For heap poisoning, the reference is poisoned, so unpoison it first.
   2617     UNPOISON_HEAP_REF ip
   2618     // Check for null or marked, lock word is loaded into R4.
   2619     BRBMI_CHECK_NULL_AND_MARKED _narrow
   2620     // Load the 16-bit instruction.
   2621     BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET
   2622 .Lmark_introspection_extract_register_and_return_narrow:
   2623     and     r4, r4, #7                // Extract `ref_reg`.
   2624 .Lmark_introspection_return_switch:
   2625     tbb     [pc, r4]                  // Jump to the switch case.
   2626 .Lmark_introspection_return_table:
   2627     BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET
   2628     .balign 16
   2629     BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE
   2630 
   2631     BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide
   2632     BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow
   2633 
   2634     .balign 256
   2635     .thumb_func
   2636     .type art_quick_read_barrier_mark_introspection_arrays, #function
   2637     .hidden art_quick_read_barrier_mark_introspection_arrays
   2638     .global art_quick_read_barrier_mark_introspection_arrays
   2639 art_quick_read_barrier_mark_introspection_arrays:
   2640     BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B
   2641 END art_quick_read_barrier_mark_introspection
   2642 
   2643 .extern artInvokePolymorphic
   2644 ENTRY art_quick_invoke_polymorphic
   2645     SETUP_SAVE_REFS_AND_ARGS_FRAME r2
   2646     mov     r2, r9                 @ pass Thread::Current
   2647     mov     r3, sp                 @ pass SP
   2648     mov     r0, #0                 @ initialize 64-bit JValue as zero.
   2649     str     r0, [sp, #-4]!
   2650     .cfi_adjust_cfa_offset 4
   2651     str     r0, [sp, #-4]!
   2652     .cfi_adjust_cfa_offset 4
   2653     mov     r0, sp                 @ pass JValue for return result as first argument.
   2654     bl      artInvokePolymorphic   @ artInvokePolymorphic(JValue, receiver, Thread*, SP)
   2655     sub     r0, 'A'                @ return value is descriptor of handle's return type.
   2656     cmp     r0, 'Z' - 'A'          @ check if value is in bounds of handler table
   2657     bgt     .Lcleanup_and_return   @ and clean-up if not.
   2658     adr     r1, .Lhandler_table
   2659     tbb     [r0, r1]               @ branch to handler for return value based on return type.
   2660 
   2661 .Lstart_of_handlers:
   2662 .Lstore_boolean_result:
   2663     ldrb    r0, [sp]               @ Copy boolean value to return value of this function.
   2664     b       .Lcleanup_and_return
   2665 .Lstore_char_result:
   2666     ldrh    r0, [sp]               @ Copy char value to return value of this function.
   2667     b       .Lcleanup_and_return
   2668 .Lstore_float_result:
   2669     vldr    s0, [sp]               @ Copy float value from JValue result to the context restored by
   2670     vstr    s0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2671     b       .Lcleanup_and_return
   2672 .Lstore_double_result:
   2673     vldr    d0, [sp]               @ Copy double value from JValue result to the context restored by
   2674     vstr    d0, [sp, #16]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2675     b       .Lcleanup_and_return
   2676 .Lstore_long_result:
   2677     ldr     r1, [sp, #4]           @ Copy the upper bits from JValue result to the context restored by
   2678     str     r1, [sp, #80]          @ RESTORE_SAVE_REFS_AND_ARGS_FRAME.
   2679     // Fall-through for lower bits.
   2680 .Lstore_int_result:
   2681     ldr     r0, [sp]               @ Copy int value to return value of this function.
   2682     // Fall-through to clean up and return.
   2683 .Lcleanup_and_return:
   2684     add     sp, #8
   2685     .cfi_adjust_cfa_offset -8
   2686     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2687     REFRESH_MARKING_REGISTER
   2688     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2
   2689 
   2690 .macro HANDLER_TABLE_OFFSET handler_label
   2691     .byte (\handler_label - .Lstart_of_handlers) / 2
   2692 .endm
   2693 
   2694 .Lhandler_table:
   2695     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
   2696     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // B (byte)
   2697     HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
   2698     HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
   2699     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
   2700     HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
   2701     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
   2702     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
   2703     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // I (int)
   2704     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
   2705     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
   2706     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // L (object)
   2707     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
   2708     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
   2709     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
   2710     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
   2711     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
   2712     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
   2713     HANDLER_TABLE_OFFSET(.Lstore_int_result)      // S (short)
   2714     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
   2715     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
   2716     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
   2717     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
   2718     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
   2719     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
   2720     HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
   2721 .purgem HANDLER_TABLE_OFFSET
   2722 END art_quick_invoke_polymorphic
   2723 
   2724 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding.
   2725 //  Argument 0: r0: The context pointer for ExecuteSwitchImpl.
   2726 //  Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call.
   2727 //  Argument 2: r2: The value of DEX PC (memory address of the methods bytecode).
   2728 ENTRY ExecuteSwitchImplAsm
   2729     push {r4, lr}                                 // 2 words of callee saves.
   2730     .cfi_adjust_cfa_offset 8
   2731     .cfi_rel_offset r4, 0
   2732     .cfi_rel_offset lr, 4
   2733     mov r4, r2                                    // r4 = DEX PC
   2734     CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0)
   2735     blx r1                                        // Call the wrapped method.
   2736     pop {r4, pc}
   2737 END ExecuteSwitchImplAsm
   2738