Home | History | Annotate | Download | only in arm64
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "asm_support_arm64.S"
     18 
     19 #include "arch/quick_alloc_entrypoints.S"
     20 
     21 
     22 .macro INCREASE_FRAME frame_adjustment
     23     sub sp, sp, #(\frame_adjustment)
     24     .cfi_adjust_cfa_offset (\frame_adjustment)
     25 .endm
     26 
     27 .macro DECREASE_FRAME frame_adjustment
     28     add sp, sp, #(\frame_adjustment)
     29     .cfi_adjust_cfa_offset -(\frame_adjustment)
     30 .endm
     31 
     32 .macro SAVE_REG reg, offset
     33     str \reg, [sp, #(\offset)]
     34     .cfi_rel_offset \reg, (\offset)
     35 .endm
     36 
     37 .macro RESTORE_REG reg, offset
     38     ldr \reg, [sp, #(\offset)]
     39     .cfi_restore \reg
     40 .endm
     41 
     42 .macro SAVE_TWO_REGS reg1, reg2, offset
     43     stp \reg1, \reg2, [sp, #(\offset)]
     44     .cfi_rel_offset \reg1, (\offset)
     45     .cfi_rel_offset \reg2, (\offset) + 8
     46 .endm
     47 
     48 .macro RESTORE_TWO_REGS reg1, reg2, offset
     49     ldp \reg1, \reg2, [sp, #(\offset)]
     50     .cfi_restore \reg1
     51     .cfi_restore \reg2
     52 .endm
     53 
     54 .macro SAVE_TWO_REGS_INCREASE_FRAME reg1, reg2, frame_adjustment
     55     stp \reg1, \reg2, [sp, #-(\frame_adjustment)]!
     56     .cfi_adjust_cfa_offset (\frame_adjustment)
     57     .cfi_rel_offset \reg1, 0
     58     .cfi_rel_offset \reg2, 8
     59 .endm
     60 
     61 .macro RESTORE_TWO_REGS_DECREASE_FRAME reg1, reg2, frame_adjustment
     62     ldp \reg1, \reg2, [sp], #(\frame_adjustment)
     63     .cfi_restore \reg1
     64     .cfi_restore \reg2
     65     .cfi_adjust_cfa_offset -(\frame_adjustment)
     66 .endm
     67 
     68     /*
     69      * Macro that sets up the callee save frame to conform with
     70      * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves)
     71      */
     72 .macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
     73     // art::Runtime** xIP0 = &art::Runtime::instance_
     74     adrp xIP0, :got:_ZN3art7Runtime9instance_E
     75     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
     76 
     77     // Our registers aren't intermixed - just spill in order.
     78     ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
     79 
     80     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveAllCalleeSaves];
     81     ldr xIP0, [xIP0, RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET]
     82 
     83     INCREASE_FRAME 176
     84 
     85     // Ugly compile-time check, but we only have the preprocessor.
     86 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 176)
     87 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM64) size not as expected."
     88 #endif
     89 
     90     // Stack alignment filler [sp, #8].
     91     // FP callee-saves.
     92     stp d8, d9,   [sp, #16]
     93     stp d10, d11, [sp, #32]
     94     stp d12, d13, [sp, #48]
     95     stp d14, d15, [sp, #64]
     96 
     97     // GP callee-saves
     98     SAVE_TWO_REGS x19, x20, 80
     99     SAVE_TWO_REGS x21, x22, 96
    100     SAVE_TWO_REGS x23, x24, 112
    101     SAVE_TWO_REGS x25, x26, 128
    102     SAVE_TWO_REGS x27, x28, 144
    103     SAVE_TWO_REGS x29, xLR, 160
    104 
    105     // Store ArtMethod* Runtime::callee_save_methods_[kSaveAllCalleeSaves].
    106     str xIP0, [sp]
    107     // Place sp in Thread::Current()->top_quick_frame.
    108     mov xIP0, sp
    109     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    110 .endm
    111 
    112     /*
    113      * Macro that sets up the callee save frame to conform with
    114      * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly).
    115      */
    116 .macro SETUP_SAVE_REFS_ONLY_FRAME
    117     // art::Runtime** xIP0 = &art::Runtime::instance_
    118     adrp xIP0, :got:_ZN3art7Runtime9instance_E
    119     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
    120 
    121     // Our registers aren't intermixed - just spill in order.
    122     ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
    123 
    124     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefOnly];
    125     ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET]
    126 
    127     INCREASE_FRAME 96
    128 
    129     // Ugly compile-time check, but we only have the preprocessor.
    130 #if (FRAME_SIZE_SAVE_REFS_ONLY != 96)
    131 #error "FRAME_SIZE_SAVE_REFS_ONLY(ARM64) size not as expected."
    132 #endif
    133 
    134     // GP callee-saves.
    135     // x20 paired with ArtMethod* - see below.
    136     SAVE_TWO_REGS x21, x22, 16
    137     SAVE_TWO_REGS x23, x24, 32
    138     SAVE_TWO_REGS x25, x26, 48
    139     SAVE_TWO_REGS x27, x28, 64
    140     SAVE_TWO_REGS x29, xLR, 80
    141 
    142     // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsOnly].
    143     stp xIP0, x20, [sp]
    144     .cfi_rel_offset x20, 8
    145 
    146     // Place sp in Thread::Current()->top_quick_frame.
    147     mov xIP0, sp
    148     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    149 .endm
    150 
    151 // TODO: Probably no need to restore registers preserved by aapcs64.
    152 .macro RESTORE_SAVE_REFS_ONLY_FRAME
    153     // Callee-saves.
    154     RESTORE_REG x20, 8
    155     RESTORE_TWO_REGS x21, x22, 16
    156     RESTORE_TWO_REGS x23, x24, 32
    157     RESTORE_TWO_REGS x25, x26, 48
    158     RESTORE_TWO_REGS x27, x28, 64
    159     RESTORE_TWO_REGS x29, xLR, 80
    160 
    161     DECREASE_FRAME 96
    162 .endm
    163 
    164 .macro POP_SAVE_REFS_ONLY_FRAME
    165     DECREASE_FRAME 96
    166 .endm
    167 
    168 .macro RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
    169     RESTORE_SAVE_REFS_ONLY_FRAME
    170     ret
    171 .endm
    172 
    173 
    174 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
    175     INCREASE_FRAME 224
    176 
    177     // Ugly compile-time check, but we only have the preprocessor.
    178 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 224)
    179 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM64) size not as expected."
    180 #endif
    181 
    182     // Stack alignment filler [sp, #8].
    183     // FP args.
    184     stp d0, d1, [sp, #16]
    185     stp d2, d3, [sp, #32]
    186     stp d4, d5, [sp, #48]
    187     stp d6, d7, [sp, #64]
    188 
    189     // Core args.
    190     SAVE_TWO_REGS x1, x2, 80
    191     SAVE_TWO_REGS x3, x4, 96
    192     SAVE_TWO_REGS x5, x6, 112
    193 
    194     // x7, Callee-saves.
    195     SAVE_TWO_REGS x7, x20, 128
    196     SAVE_TWO_REGS x21, x22, 144
    197     SAVE_TWO_REGS x23, x24, 160
    198     SAVE_TWO_REGS x25, x26, 176
    199     SAVE_TWO_REGS x27, x28, 192
    200 
    201     // x29(callee-save) and LR.
    202     SAVE_TWO_REGS x29, xLR, 208
    203 
    204 .endm
    205 
    206     /*
    207      * Macro that sets up the callee save frame to conform with
    208      * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs).
    209      *
    210      * TODO This is probably too conservative - saving FP & LR.
    211      */
    212 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME
    213     // art::Runtime** xIP0 = &art::Runtime::instance_
    214     adrp xIP0, :got:_ZN3art7Runtime9instance_E
    215     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
    216 
    217     // Our registers aren't intermixed - just spill in order.
    218     ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
    219 
    220     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveRefAndArgs];
    221     ldr xIP0, [xIP0, RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET]
    222 
    223     SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
    224 
    225     str xIP0, [sp]    // Store ArtMethod* Runtime::callee_save_methods_[kSaveRefsAndArgs].
    226     // Place sp in Thread::Current()->top_quick_frame.
    227     mov xIP0, sp
    228     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    229 .endm
    230 
    231 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
    232     SETUP_SAVE_REFS_AND_ARGS_FRAME_INTERNAL
    233     str x0, [sp, #0]  // Store ArtMethod* to bottom of stack.
    234     // Place sp in Thread::Current()->top_quick_frame.
    235     mov xIP0, sp
    236     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    237 .endm
    238 
    239 // TODO: Probably no need to restore registers preserved by aapcs64.
    240 .macro RESTORE_SAVE_REFS_AND_ARGS_FRAME
    241     // FP args.
    242     ldp d0, d1, [sp, #16]
    243     ldp d2, d3, [sp, #32]
    244     ldp d4, d5, [sp, #48]
    245     ldp d6, d7, [sp, #64]
    246 
    247     // Core args.
    248     RESTORE_TWO_REGS x1, x2, 80
    249     RESTORE_TWO_REGS x3, x4, 96
    250     RESTORE_TWO_REGS x5, x6, 112
    251 
    252     // x7, Callee-saves.
    253     RESTORE_TWO_REGS x7, x20, 128
    254     RESTORE_TWO_REGS x21, x22, 144
    255     RESTORE_TWO_REGS x23, x24, 160
    256     RESTORE_TWO_REGS x25, x26, 176
    257     RESTORE_TWO_REGS x27, x28, 192
    258 
    259     // x29(callee-save) and LR.
    260     RESTORE_TWO_REGS x29, xLR, 208
    261 
    262     DECREASE_FRAME 224
    263 .endm
    264 
    265     /*
    266      * Macro that sets up the callee save frame to conform with
    267      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    268      * when the SP has already been decremented by FRAME_SIZE_SAVE_EVERYTHING
    269      * and saving registers x29 and LR is handled elsewhere.
    270      */
    271 .macro SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
    272     // Ugly compile-time check, but we only have the preprocessor.
    273 #if (FRAME_SIZE_SAVE_EVERYTHING != 512)
    274 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM64) size not as expected."
    275 #endif
    276 
    277     // Save FP registers.
    278     // For better performance, store d0 and d31 separately, so that all STPs are 16-byte aligned.
    279     str d0,       [sp, #8]
    280     stp d1, d2,   [sp, #16]
    281     stp d3, d4,   [sp, #32]
    282     stp d5, d6,   [sp, #48]
    283     stp d7, d8,   [sp, #64]
    284     stp d9, d10,  [sp, #80]
    285     stp d11, d12, [sp, #96]
    286     stp d13, d14, [sp, #112]
    287     stp d15, d16, [sp, #128]
    288     stp d17, d18, [sp, #144]
    289     stp d19, d20, [sp, #160]
    290     stp d21, d22, [sp, #176]
    291     stp d23, d24, [sp, #192]
    292     stp d25, d26, [sp, #208]
    293     stp d27, d28, [sp, #224]
    294     stp d29, d30, [sp, #240]
    295     str d31,      [sp, #256]
    296 
    297     // Save core registers.
    298     SAVE_REG            x0, 264
    299     SAVE_TWO_REGS  x1,  x2, 272
    300     SAVE_TWO_REGS  x3,  x4, 288
    301     SAVE_TWO_REGS  x5,  x6, 304
    302     SAVE_TWO_REGS  x7,  x8, 320
    303     SAVE_TWO_REGS  x9, x10, 336
    304     SAVE_TWO_REGS x11, x12, 352
    305     SAVE_TWO_REGS x13, x14, 368
    306     SAVE_TWO_REGS x15, x16, 384
    307     SAVE_TWO_REGS x17, x18, 400
    308     SAVE_TWO_REGS x19, x20, 416
    309     SAVE_TWO_REGS x21, x22, 432
    310     SAVE_TWO_REGS x23, x24, 448
    311     SAVE_TWO_REGS x25, x26, 464
    312     SAVE_TWO_REGS x27, x28, 480
    313 
    314     // art::Runtime** xIP0 = &art::Runtime::instance_
    315     adrp xIP0, :got:_ZN3art7Runtime9instance_E
    316     ldr xIP0, [xIP0, #:got_lo12:_ZN3art7Runtime9instance_E]
    317 
    318     ldr xIP0, [xIP0]  // art::Runtime* xIP0 = art::Runtime::instance_;
    319 
    320     // ArtMethod* xIP0 = Runtime::instance_->callee_save_methods_[kSaveEverything];
    321     ldr xIP0, [xIP0, RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET]
    322 
    323     // Store ArtMethod* Runtime::callee_save_methods_[kSaveEverything].
    324     str xIP0, [sp]
    325     // Place sp in Thread::Current()->top_quick_frame.
    326     mov xIP0, sp
    327     str xIP0, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
    328 .endm
    329 
    330     /*
    331      * Macro that sets up the callee save frame to conform with
    332      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
    333      */
    334 .macro SETUP_SAVE_EVERYTHING_FRAME
    335     INCREASE_FRAME 512
    336     SAVE_TWO_REGS x29, xLR, 496
    337     SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
    338 .endm
    339 
    340 .macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
    341     // Restore FP registers.
    342     // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
    343     ldr d0,       [sp, #8]
    344     ldp d1, d2,   [sp, #16]
    345     ldp d3, d4,   [sp, #32]
    346     ldp d5, d6,   [sp, #48]
    347     ldp d7, d8,   [sp, #64]
    348     ldp d9, d10,  [sp, #80]
    349     ldp d11, d12, [sp, #96]
    350     ldp d13, d14, [sp, #112]
    351     ldp d15, d16, [sp, #128]
    352     ldp d17, d18, [sp, #144]
    353     ldp d19, d20, [sp, #160]
    354     ldp d21, d22, [sp, #176]
    355     ldp d23, d24, [sp, #192]
    356     ldp d25, d26, [sp, #208]
    357     ldp d27, d28, [sp, #224]
    358     ldp d29, d30, [sp, #240]
    359     ldr d31,      [sp, #256]
    360 
    361     // Restore core registers.
    362     RESTORE_TWO_REGS  x1,  x2, 272
    363     RESTORE_TWO_REGS  x3,  x4, 288
    364     RESTORE_TWO_REGS  x5,  x6, 304
    365     RESTORE_TWO_REGS  x7,  x8, 320
    366     RESTORE_TWO_REGS  x9, x10, 336
    367     RESTORE_TWO_REGS x11, x12, 352
    368     RESTORE_TWO_REGS x13, x14, 368
    369     RESTORE_TWO_REGS x15, x16, 384
    370     RESTORE_TWO_REGS x17, x18, 400
    371     RESTORE_TWO_REGS x19, x20, 416
    372     RESTORE_TWO_REGS x21, x22, 432
    373     RESTORE_TWO_REGS x23, x24, 448
    374     RESTORE_TWO_REGS x25, x26, 464
    375     RESTORE_TWO_REGS x27, x28, 480
    376     RESTORE_TWO_REGS x29, xLR, 496
    377 
    378     DECREASE_FRAME 512
    379 .endm
    380 
    381 .macro RESTORE_SAVE_EVERYTHING_FRAME
    382     RESTORE_REG            x0, 264
    383     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
    384 .endm
    385 
    386 .macro RETURN_IF_RESULT_IS_ZERO
    387     cbnz x0, 1f                // result non-zero branch over
    388     ret                        // return
    389 1:
    390 .endm
    391 
    392 .macro RETURN_IF_RESULT_IS_NON_ZERO
    393     cbz x0, 1f                 // result zero branch over
    394     ret                        // return
    395 1:
    396 .endm
    397 
    398     /*
    399      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    400      * exception is Thread::Current()->exception_ when the runtime method frame is ready.
    401      */
    402 .macro DELIVER_PENDING_EXCEPTION_FRAME_READY
    403     mov x0, xSELF
    404 
    405     // Point of no return.
    406     bl artDeliverPendingExceptionFromCode  // artDeliverPendingExceptionFromCode(Thread*)
    407     brk 0  // Unreached
    408 .endm
    409 
    410     /*
    411      * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
    412      * exception is Thread::Current()->exception_.
    413      */
    414 .macro DELIVER_PENDING_EXCEPTION
    415     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
    416     DELIVER_PENDING_EXCEPTION_FRAME_READY
    417 .endm
    418 
    419 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
    420     ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
    421     cbnz \reg, 1f
    422     ret
    423 1:
    424     DELIVER_PENDING_EXCEPTION
    425 .endm
    426 
    427 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION
    428     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG xIP0
    429 .endm
    430 
    431 // Same as above with x1. This is helpful in stubs that want to avoid clobbering another register.
    432 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
    433     RETURN_OR_DELIVER_PENDING_EXCEPTION_REG x1
    434 .endm
    435 
    436 .macro RETURN_IF_W0_IS_ZERO_OR_DELIVER
    437     cbnz w0, 1f                // result non-zero branch over
    438     ret                        // return
    439 1:
    440     DELIVER_PENDING_EXCEPTION
    441 .endm
    442 
    443 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    444     .extern \cxx_name
    445 ENTRY \c_name
    446     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
    447     mov x0, xSELF                     // pass Thread::Current
    448     bl  \cxx_name                     // \cxx_name(Thread*)
    449     brk 0
    450 END \c_name
    451 .endm
    452 
    453 .macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    454     .extern \cxx_name
    455 ENTRY \c_name
    456     SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
    457     mov x0, xSELF                     // pass Thread::Current
    458     bl  \cxx_name                     // \cxx_name(Thread*)
    459     brk 0
    460 END \c_name
    461 .endm
    462 
    463 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name
    464     .extern \cxx_name
    465 ENTRY \c_name
    466     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context.
    467     mov x1, xSELF                     // pass Thread::Current.
    468     bl  \cxx_name                     // \cxx_name(arg, Thread*).
    469     brk 0
    470 END \c_name
    471 .endm
    472 
    473 .macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name
    474     .extern \cxx_name
    475 ENTRY \c_name
    476     SETUP_SAVE_EVERYTHING_FRAME       // save all registers as basis for long jump context
    477     mov x2, xSELF                     // pass Thread::Current
    478     bl  \cxx_name                     // \cxx_name(arg1, arg2, Thread*)
    479     brk 0
    480 END \c_name
    481 .endm
    482 
    483     /*
    484      * Called by managed code, saves callee saves and then calls artThrowException
    485      * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception.
    486      */
    487 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode
    488 
    489     /*
    490      * Called by managed code to create and deliver a NullPointerException.
    491      */
    492 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode
    493 
    494     /*
    495      * Call installed by a signal handler to create and deliver a NullPointerException.
    496      */
    497     .extern art_quick_throw_null_pointer_exception_from_signal
    498 ENTRY art_quick_throw_null_pointer_exception_from_signal
    499     // The fault handler pushes the gc map address, i.e. "return address", to stack
    500     // and passes the fault address in LR. So we need to set up the CFI info accordingly.
    501     .cfi_def_cfa_offset __SIZEOF_POINTER__
    502     .cfi_rel_offset lr, 0
    503     // Save all registers as basis for long jump context.
    504     INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - __SIZEOF_POINTER__)
    505     SAVE_REG x29, (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)  // LR already saved.
    506     SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
    507     mov x0, lr                        // pass the fault address stored in LR by the fault handler.
    508     mov x1, xSELF                     // pass Thread::Current.
    509     bl  artThrowNullPointerExceptionFromSignal  // (arg, Thread*).
    510     brk 0
    511 END art_quick_throw_null_pointer_exception_from_signal
    512 
    513     /*
    514      * Called by managed code to create and deliver an ArithmeticException.
    515      */
    516 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode
    517 
    518     /*
    519      * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds
    520      * index, arg2 holds limit.
    521      */
    522 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode
    523 
    524     /*
    525      * Called by managed code to create and deliver a StringIndexOutOfBoundsException
    526      * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit.
    527      */
    528 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode
    529 
    530     /*
    531      * Called by managed code to create and deliver a StackOverflowError.
    532      */
    533 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode
    534 
    535     /*
    536      * All generated callsites for interface invokes and invocation slow paths will load arguments
    537      * as usual - except instead of loading arg0/x0 with the target Method*, arg0/x0 will contain
    538      * the method_idx.  This wrapper will save arg1-arg3, and call the appropriate C helper.
    539      * NOTE: "this" is first visible argument of the target, and so can be found in arg1/x1.
    540      *
    541      * The helper will attempt to locate the target and return a 128-bit result in x0/x1 consisting
    542      * of the target Method* in x0 and method->code_ in x1.
    543      *
    544      * If unsuccessful, the helper will return null/????. There will be a pending exception in the
    545      * thread and we branch to another stub to deliver it.
    546      *
    547      * On success this wrapper will restore arguments and *jump* to the target, leaving the lr
    548      * pointing back to the original caller.
    549      *
    550      * Adapted from ARM32 code.
    551      *
    552      * Clobbers xIP0.
    553      */
    554 .macro INVOKE_TRAMPOLINE_BODY cxx_name
    555     .extern \cxx_name
    556     SETUP_SAVE_REFS_AND_ARGS_FRAME        // save callee saves in case allocation triggers GC
    557     // Helper signature is always
    558     // (method_idx, *this_object, *caller_method, *self, sp)
    559 
    560     mov    x2, xSELF                      // pass Thread::Current
    561     mov    x3, sp
    562     bl     \cxx_name                      // (method_idx, this, Thread*, SP)
    563     mov    xIP0, x1                       // save Method*->code_
    564     RESTORE_SAVE_REFS_AND_ARGS_FRAME
    565     cbz    x0, 1f                         // did we find the target? if not go to exception delivery
    566     br     xIP0                           // tail call to target
    567 1:
    568     DELIVER_PENDING_EXCEPTION
    569 .endm
    570 .macro INVOKE_TRAMPOLINE c_name, cxx_name
    571 ENTRY \c_name
    572     INVOKE_TRAMPOLINE_BODY \cxx_name
    573 END \c_name
    574 .endm
    575 
    576 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck
    577 
    578 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck
    579 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck
    580 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck
    581 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck
    582 
    583 
    584 .macro INVOKE_STUB_CREATE_FRAME
    585 
    586 SAVE_SIZE=15*8   // x4, x5, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
    587 SAVE_SIZE_AND_METHOD=SAVE_SIZE+8
    588 
    589 
    590     mov x9, sp                             // Save stack pointer.
    591     .cfi_register sp,x9
    592 
    593     add x10, x2, # SAVE_SIZE_AND_METHOD    // calculate size of frame.
    594     sub x10, sp, x10                       // Calculate SP position - saves + ArtMethod* + args
    595     and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
    596     mov sp, x10                            // Set new SP.
    597 
    598     sub x10, x9, #SAVE_SIZE                // Calculate new FP (later). Done here as we must move SP
    599     .cfi_def_cfa_register x10              // before this.
    600     .cfi_adjust_cfa_offset SAVE_SIZE
    601 
    602     str x28, [x10, #112]
    603     .cfi_rel_offset x28, 112
    604 
    605     stp x26, x27, [x10, #96]
    606     .cfi_rel_offset x26, 96
    607     .cfi_rel_offset x27, 104
    608 
    609     stp x24, x25, [x10, #80]
    610     .cfi_rel_offset x24, 80
    611     .cfi_rel_offset x25, 88
    612 
    613     stp x22, x23, [x10, #64]
    614     .cfi_rel_offset x22, 64
    615     .cfi_rel_offset x23, 72
    616 
    617     stp x20, x21, [x10, #48]
    618     .cfi_rel_offset x20, 48
    619     .cfi_rel_offset x21, 56
    620 
    621     stp x9, x19, [x10, #32]                // Save old stack pointer and x19.
    622     .cfi_rel_offset sp, 32
    623     .cfi_rel_offset x19, 40
    624 
    625     stp x4, x5, [x10, #16]                 // Save result and shorty addresses.
    626     .cfi_rel_offset x4, 16
    627     .cfi_rel_offset x5, 24
    628 
    629     stp xFP, xLR, [x10]                    // Store LR & FP.
    630     .cfi_rel_offset x29, 0
    631     .cfi_rel_offset x30, 8
    632 
    633     mov xFP, x10                           // Use xFP now, as it's callee-saved.
    634     .cfi_def_cfa_register x29
    635     mov xSELF, x3                          // Move thread pointer into SELF register.
    636 
    637     // Copy arguments into stack frame.
    638     // Use simple copy routine for now.
    639     // 4 bytes per slot.
    640     // X1 - source address
    641     // W2 - args length
    642     // X9 - destination address.
    643     // W10 - temporary
    644     add x9, sp, #8                         // Destination address is bottom of stack + null.
    645 
    646     // Copy parameters into the stack. Use numeric label as this is a macro and Clang's assembler
    647     // does not have unique-id variables.
    648 1:
    649     cmp w2, #0
    650     beq 2f
    651     sub w2, w2, #4      // Need 65536 bytes of range.
    652     ldr w10, [x1, x2]
    653     str w10, [x9, x2]
    654 
    655     b 1b
    656 
    657 2:
    658     // Store null into ArtMethod* at bottom of frame.
    659     str xzr, [sp]
    660 .endm
    661 
    662 .macro INVOKE_STUB_CALL_AND_RETURN
    663 
    664     // load method-> METHOD_QUICK_CODE_OFFSET
    665     ldr x9, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
    666     // Branch to method.
    667     blr x9
    668 
    669     // Restore return value address and shorty address.
    670     ldp x4,x5, [xFP, #16]
    671     .cfi_restore x4
    672     .cfi_restore x5
    673 
    674     ldr x28, [xFP, #112]
    675     .cfi_restore x28
    676 
    677     ldp x26, x27, [xFP, #96]
    678     .cfi_restore x26
    679     .cfi_restore x27
    680 
    681     ldp x24, x25, [xFP, #80]
    682     .cfi_restore x24
    683     .cfi_restore x25
    684 
    685     ldp x22, x23, [xFP, #64]
    686     .cfi_restore x22
    687     .cfi_restore x23
    688 
    689     ldp x20, x21, [xFP, #48]
    690     .cfi_restore x20
    691     .cfi_restore x21
    692 
    693     // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
    694     ldrb w10, [x5]
    695 
    696     // Check the return type and store the correct register into the jvalue in memory.
    697     // Use numeric label as this is a macro and Clang's assembler does not have unique-id variables.
    698 
    699     // Don't set anything for a void type.
    700     cmp w10, #'V'
    701     beq 3f
    702 
    703     // Is it a double?
    704     cmp w10, #'D'
    705     bne 1f
    706     str d0, [x4]
    707     b 3f
    708 
    709 1:  // Is it a float?
    710     cmp w10, #'F'
    711     bne 2f
    712     str s0, [x4]
    713     b 3f
    714 
    715 2:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
    716     str x0, [x4]
    717 
    718 3:  // Finish up.
    719     ldp x2, x19, [xFP, #32]   // Restore stack pointer and x19.
    720     .cfi_restore x19
    721     mov sp, x2
    722     .cfi_restore sp
    723 
    724     ldp xFP, xLR, [xFP]    // Restore old frame pointer and link register.
    725     .cfi_restore x29
    726     .cfi_restore x30
    727 
    728     ret
    729 
    730 .endm
    731 
    732 
    733 /*
    734  *  extern"C" void art_quick_invoke_stub(ArtMethod *method,   x0
    735  *                                       uint32_t  *args,     x1
    736  *                                       uint32_t argsize,    w2
    737  *                                       Thread *self,        x3
    738  *                                       JValue *result,      x4
    739  *                                       char   *shorty);     x5
    740  *  +----------------------+
    741  *  |                      |
    742  *  |  C/C++ frame         |
    743  *  |       LR''           |
    744  *  |       FP''           | <- SP'
    745  *  +----------------------+
    746  *  +----------------------+
    747  *  |        x28           | <- TODO: Remove callee-saves.
    748  *  |         :            |
    749  *  |        x19           |
    750  *  |        SP'           |
    751  *  |        X5            |
    752  *  |        X4            |        Saved registers
    753  *  |        LR'           |
    754  *  |        FP'           | <- FP
    755  *  +----------------------+
    756  *  | uint32_t out[n-1]    |
    757  *  |    :      :          |        Outs
    758  *  | uint32_t out[0]      |
    759  *  | ArtMethod*           | <- SP  value=null
    760  *  +----------------------+
    761  *
    762  * Outgoing registers:
    763  *  x0    - Method*
    764  *  x1-x7 - integer parameters.
    765  *  d0-d7 - Floating point parameters.
    766  *  xSELF = self
    767  *  SP = & of ArtMethod*
    768  *  x1 = "this" pointer.
    769  *
    770  */
    771 ENTRY art_quick_invoke_stub
    772     // Spill registers as per AACPS64 calling convention.
    773     INVOKE_STUB_CREATE_FRAME
    774 
    775     // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
    776     // Parse the passed shorty to determine which register to load.
    777     // Load addresses for routines that load WXSD registers.
    778     adr  x11, .LstoreW2
    779     adr  x12, .LstoreX2
    780     adr  x13, .LstoreS0
    781     adr  x14, .LstoreD0
    782 
    783     // Initialize routine offsets to 0 for integers and floats.
    784     // x8 for integers, x15 for floating point.
    785     mov x8, #0
    786     mov x15, #0
    787 
    788     add x10, x5, #1         // Load shorty address, plus one to skip return value.
    789     ldr w1, [x9],#4         // Load "this" parameter, and increment arg pointer.
    790 
    791     // Loop to fill registers.
    792 .LfillRegisters:
    793     ldrb w17, [x10], #1       // Load next character in signature, and increment.
    794     cbz w17, .LcallFunction   // Exit at end of signature. Shorty 0 terminated.
    795 
    796     cmp  w17, #'F' // is this a float?
    797     bne .LisDouble
    798 
    799     cmp x15, # 8*12         // Skip this load if all registers full.
    800     beq .Ladvance4
    801 
    802     add x17, x13, x15       // Calculate subroutine to jump to.
    803     br  x17
    804 
    805 .LisDouble:
    806     cmp w17, #'D'           // is this a double?
    807     bne .LisLong
    808 
    809     cmp x15, # 8*12         // Skip this load if all registers full.
    810     beq .Ladvance8
    811 
    812     add x17, x14, x15       // Calculate subroutine to jump to.
    813     br x17
    814 
    815 .LisLong:
    816     cmp w17, #'J'           // is this a long?
    817     bne .LisOther
    818 
    819     cmp x8, # 6*12          // Skip this load if all registers full.
    820     beq .Ladvance8
    821 
    822     add x17, x12, x8        // Calculate subroutine to jump to.
    823     br x17
    824 
    825 .LisOther:                  // Everything else takes one vReg.
    826     cmp x8, # 6*12          // Skip this load if all registers full.
    827     beq .Ladvance4
    828 
    829     add x17, x11, x8        // Calculate subroutine to jump to.
    830     br x17
    831 
    832 .Ladvance4:
    833     add x9, x9, #4
    834     b .LfillRegisters
    835 
    836 .Ladvance8:
    837     add x9, x9, #8
    838     b .LfillRegisters
    839 
    840 // Macro for loading a parameter into a register.
    841 //  counter - the register with offset into these tables
    842 //  size - the size of the register - 4 or 8 bytes.
    843 //  register - the name of the register to be loaded.
    844 .macro LOADREG counter size register return
    845     ldr \register , [x9], #\size
    846     add \counter, \counter, 12
    847     b \return
    848 .endm
    849 
    850 // Store ints.
    851 .LstoreW2:
    852     LOADREG x8 4 w2 .LfillRegisters
    853     LOADREG x8 4 w3 .LfillRegisters
    854     LOADREG x8 4 w4 .LfillRegisters
    855     LOADREG x8 4 w5 .LfillRegisters
    856     LOADREG x8 4 w6 .LfillRegisters
    857     LOADREG x8 4 w7 .LfillRegisters
    858 
    859 // Store longs.
    860 .LstoreX2:
    861     LOADREG x8 8 x2 .LfillRegisters
    862     LOADREG x8 8 x3 .LfillRegisters
    863     LOADREG x8 8 x4 .LfillRegisters
    864     LOADREG x8 8 x5 .LfillRegisters
    865     LOADREG x8 8 x6 .LfillRegisters
    866     LOADREG x8 8 x7 .LfillRegisters
    867 
    868 // Store singles.
    869 .LstoreS0:
    870     LOADREG x15 4 s0 .LfillRegisters
    871     LOADREG x15 4 s1 .LfillRegisters
    872     LOADREG x15 4 s2 .LfillRegisters
    873     LOADREG x15 4 s3 .LfillRegisters
    874     LOADREG x15 4 s4 .LfillRegisters
    875     LOADREG x15 4 s5 .LfillRegisters
    876     LOADREG x15 4 s6 .LfillRegisters
    877     LOADREG x15 4 s7 .LfillRegisters
    878 
    879 // Store doubles.
    880 .LstoreD0:
    881     LOADREG x15 8 d0 .LfillRegisters
    882     LOADREG x15 8 d1 .LfillRegisters
    883     LOADREG x15 8 d2 .LfillRegisters
    884     LOADREG x15 8 d3 .LfillRegisters
    885     LOADREG x15 8 d4 .LfillRegisters
    886     LOADREG x15 8 d5 .LfillRegisters
    887     LOADREG x15 8 d6 .LfillRegisters
    888     LOADREG x15 8 d7 .LfillRegisters
    889 
    890 
    891 .LcallFunction:
    892 
    893     INVOKE_STUB_CALL_AND_RETURN
    894 
    895 END art_quick_invoke_stub
    896 
    897 /*  extern"C"
    898  *     void art_quick_invoke_static_stub(ArtMethod *method,   x0
    899  *                                       uint32_t  *args,     x1
    900  *                                       uint32_t argsize,    w2
    901  *                                       Thread *self,        x3
    902  *                                       JValue *result,      x4
    903  *                                       char   *shorty);     x5
    904  */
    905 ENTRY art_quick_invoke_static_stub
    906     // Spill registers as per AACPS64 calling convention.
    907     INVOKE_STUB_CREATE_FRAME
    908 
    909     // Fill registers x/w1 to x/w7 and s/d0 to s/d7 with parameters.
    910     // Parse the passed shorty to determine which register to load.
    911     // Load addresses for routines that load WXSD registers.
    912     adr  x11, .LstoreW1_2
    913     adr  x12, .LstoreX1_2
    914     adr  x13, .LstoreS0_2
    915     adr  x14, .LstoreD0_2
    916 
    917     // Initialize routine offsets to 0 for integers and floats.
    918     // x8 for integers, x15 for floating point.
    919     mov x8, #0
    920     mov x15, #0
    921 
    922     add x10, x5, #1     // Load shorty address, plus one to skip return value.
    923 
    924     // Loop to fill registers.
    925 .LfillRegisters2:
    926     ldrb w17, [x10], #1         // Load next character in signature, and increment.
    927     cbz w17, .LcallFunction2    // Exit at end of signature. Shorty 0 terminated.
    928 
    929     cmp  w17, #'F'          // is this a float?
    930     bne .LisDouble2
    931 
    932     cmp x15, # 8*12         // Skip this load if all registers full.
    933     beq .Ladvance4_2
    934 
    935     add x17, x13, x15       // Calculate subroutine to jump to.
    936     br  x17
    937 
    938 .LisDouble2:
    939     cmp w17, #'D'           // is this a double?
    940     bne .LisLong2
    941 
    942     cmp x15, # 8*12         // Skip this load if all registers full.
    943     beq .Ladvance8_2
    944 
    945     add x17, x14, x15       // Calculate subroutine to jump to.
    946     br x17
    947 
    948 .LisLong2:
    949     cmp w17, #'J'           // is this a long?
    950     bne .LisOther2
    951 
    952     cmp x8, # 7*12          // Skip this load if all registers full.
    953     beq .Ladvance8_2
    954 
    955     add x17, x12, x8        // Calculate subroutine to jump to.
    956     br x17
    957 
    958 .LisOther2:                 // Everything else takes one vReg.
    959     cmp x8, # 7*12          // Skip this load if all registers full.
    960     beq .Ladvance4_2
    961 
    962     add x17, x11, x8        // Calculate subroutine to jump to.
    963     br x17
    964 
    965 .Ladvance4_2:
    966     add x9, x9, #4
    967     b .LfillRegisters2
    968 
    969 .Ladvance8_2:
    970     add x9, x9, #8
    971     b .LfillRegisters2
    972 
    973 // Store ints.
    974 .LstoreW1_2:
    975     LOADREG x8 4 w1 .LfillRegisters2
    976     LOADREG x8 4 w2 .LfillRegisters2
    977     LOADREG x8 4 w3 .LfillRegisters2
    978     LOADREG x8 4 w4 .LfillRegisters2
    979     LOADREG x8 4 w5 .LfillRegisters2
    980     LOADREG x8 4 w6 .LfillRegisters2
    981     LOADREG x8 4 w7 .LfillRegisters2
    982 
    983 // Store longs.
    984 .LstoreX1_2:
    985     LOADREG x8 8 x1 .LfillRegisters2
    986     LOADREG x8 8 x2 .LfillRegisters2
    987     LOADREG x8 8 x3 .LfillRegisters2
    988     LOADREG x8 8 x4 .LfillRegisters2
    989     LOADREG x8 8 x5 .LfillRegisters2
    990     LOADREG x8 8 x6 .LfillRegisters2
    991     LOADREG x8 8 x7 .LfillRegisters2
    992 
    993 // Store singles.
    994 .LstoreS0_2:
    995     LOADREG x15 4 s0 .LfillRegisters2
    996     LOADREG x15 4 s1 .LfillRegisters2
    997     LOADREG x15 4 s2 .LfillRegisters2
    998     LOADREG x15 4 s3 .LfillRegisters2
    999     LOADREG x15 4 s4 .LfillRegisters2
   1000     LOADREG x15 4 s5 .LfillRegisters2
   1001     LOADREG x15 4 s6 .LfillRegisters2
   1002     LOADREG x15 4 s7 .LfillRegisters2
   1003 
   1004 // Store doubles.
   1005 .LstoreD0_2:
   1006     LOADREG x15 8 d0 .LfillRegisters2
   1007     LOADREG x15 8 d1 .LfillRegisters2
   1008     LOADREG x15 8 d2 .LfillRegisters2
   1009     LOADREG x15 8 d3 .LfillRegisters2
   1010     LOADREG x15 8 d4 .LfillRegisters2
   1011     LOADREG x15 8 d5 .LfillRegisters2
   1012     LOADREG x15 8 d6 .LfillRegisters2
   1013     LOADREG x15 8 d7 .LfillRegisters2
   1014 
   1015 
   1016 .LcallFunction2:
   1017 
   1018     INVOKE_STUB_CALL_AND_RETURN
   1019 
   1020 END art_quick_invoke_static_stub
   1021 
   1022 
   1023 
   1024 /*  extern"C" void art_quick_osr_stub(void** stack,                x0
   1025  *                                    size_t stack_size_in_bytes,  x1
   1026  *                                    const uin8_t* native_pc,     x2
   1027  *                                    JValue *result,              x3
   1028  *                                    char   *shorty,              x4
   1029  *                                    Thread *self)                x5
   1030  */
   1031 ENTRY art_quick_osr_stub
   1032 SAVE_SIZE=15*8   // x3, x4, x19, x20, x21, x22, x23, x24, x25, x26, x27, x28, SP, LR, FP saved.
   1033     mov x9, sp                             // Save stack pointer.
   1034     .cfi_register sp,x9
   1035 
   1036     sub x10, sp, # SAVE_SIZE
   1037     and x10, x10, # ~0xf                   // Enforce 16 byte stack alignment.
   1038     mov sp, x10                            // Set new SP.
   1039 
   1040     str x28, [sp, #112]
   1041     stp x26, x27, [sp, #96]
   1042     stp x24, x25, [sp, #80]
   1043     stp x22, x23, [sp, #64]
   1044     stp x20, x21, [sp, #48]
   1045     stp x9, x19, [sp, #32]                // Save old stack pointer and x19.
   1046     stp x3, x4, [sp, #16]                 // Save result and shorty addresses.
   1047     stp xFP, xLR, [sp]                    // Store LR & FP.
   1048     mov xSELF, x5                         // Move thread pointer into SELF register.
   1049 
   1050     sub sp, sp, #16
   1051     str xzr, [sp]                         // Store null for ArtMethod* slot
   1052     // Branch to stub.
   1053     bl .Losr_entry
   1054     add sp, sp, #16
   1055 
   1056     // Restore return value address and shorty address.
   1057     ldp x3,x4, [sp, #16]
   1058     ldr x28, [sp, #112]
   1059     ldp x26, x27, [sp, #96]
   1060     ldp x24, x25, [sp, #80]
   1061     ldp x22, x23, [sp, #64]
   1062     ldp x20, x21, [sp, #48]
   1063 
   1064     // Store result (w0/x0/s0/d0) appropriately, depending on resultType.
   1065     ldrb w10, [x4]
   1066 
   1067     // Check the return type and store the correct register into the jvalue in memory.
   1068 
   1069     // Don't set anything for a void type.
   1070     cmp w10, #'V'
   1071     beq .Losr_exit
   1072 
   1073     // Is it a double?
   1074     cmp w10, #'D'
   1075     bne .Lno_double
   1076     str d0, [x3]
   1077     b .Losr_exit
   1078 
   1079 .Lno_double:  // Is it a float?
   1080     cmp w10, #'F'
   1081     bne .Lno_float
   1082     str s0, [x3]
   1083     b .Losr_exit
   1084 
   1085 .Lno_float:  // Just store x0. Doesn't matter if it is 64 or 32 bits.
   1086     str x0, [x3]
   1087 
   1088 .Losr_exit:  // Finish up.
   1089     ldp x2, x19, [sp, #32]   // Restore stack pointer and x19.
   1090     ldp xFP, xLR, [sp]    // Restore old frame pointer and link register.
   1091     mov sp, x2
   1092     ret
   1093 
   1094 .Losr_entry:
   1095     // Update stack pointer for the callee
   1096     sub sp, sp, x1
   1097 
   1098     // Update link register slot expected by the callee.
   1099     sub w1, w1, #8
   1100     str lr, [sp, x1]
   1101 
   1102     // Copy arguments into stack frame.
   1103     // Use simple copy routine for now.
   1104     // 4 bytes per slot.
   1105     // X0 - source address
   1106     // W1 - args length
   1107     // SP - destination address.
   1108     // W10 - temporary
   1109 .Losr_loop_entry:
   1110     cmp w1, #0
   1111     beq .Losr_loop_exit
   1112     sub w1, w1, #4
   1113     ldr w10, [x0, x1]
   1114     str w10, [sp, x1]
   1115     b .Losr_loop_entry
   1116 
   1117 .Losr_loop_exit:
   1118     // Branch to the OSR entry point.
   1119     br x2
   1120 
   1121 END art_quick_osr_stub
   1122 
   1123     /*
   1124      * On entry x0 is uintptr_t* gprs_ and x1 is uint64_t* fprs_
   1125      */
   1126 
   1127 ENTRY art_quick_do_long_jump
   1128     // Load FPRs
   1129     ldp d0, d1, [x1], #16
   1130     ldp d2, d3, [x1], #16
   1131     ldp d4, d5, [x1], #16
   1132     ldp d6, d7, [x1], #16
   1133     ldp d8, d9, [x1], #16
   1134     ldp d10, d11, [x1], #16
   1135     ldp d12, d13, [x1], #16
   1136     ldp d14, d15, [x1], #16
   1137     ldp d16, d17, [x1], #16
   1138     ldp d18, d19, [x1], #16
   1139     ldp d20, d21, [x1], #16
   1140     ldp d22, d23, [x1], #16
   1141     ldp d24, d25, [x1], #16
   1142     ldp d26, d27, [x1], #16
   1143     ldp d28, d29, [x1], #16
   1144     ldp d30, d31, [x1]
   1145 
   1146     // Load GPRs
   1147     // TODO: lots of those are smashed, could optimize.
   1148     add x0, x0, #30*8
   1149     ldp x30, x1, [x0], #-16          // LR & SP
   1150     ldp x28, x29, [x0], #-16
   1151     ldp x26, x27, [x0], #-16
   1152     ldp x24, x25, [x0], #-16
   1153     ldp x22, x23, [x0], #-16
   1154     ldp x20, x21, [x0], #-16
   1155     ldp x18, x19, [x0], #-16
   1156     ldp x16, x17, [x0], #-16
   1157     ldp x14, x15, [x0], #-16
   1158     ldp x12, x13, [x0], #-16
   1159     ldp x10, x11, [x0], #-16
   1160     ldp x8, x9, [x0], #-16
   1161     ldp x6, x7, [x0], #-16
   1162     ldp x4, x5, [x0], #-16
   1163     ldp x2, x3, [x0], #-16
   1164     mov sp, x1
   1165 
   1166     // Need to load PC, it's at the end (after the space for the unused XZR). Use x1.
   1167     ldr x1, [x0, #33*8]
   1168     // And the value of x0.
   1169     ldr x0, [x0]
   1170 
   1171     br  x1
   1172 END art_quick_do_long_jump
   1173 
   1174     /*
   1175      * Entry from managed code that calls artLockObjectFromCode, may block for GC. x0 holds the
   1176      * possibly null object to lock.
   1177      *
   1178      * Derived from arm32 code.
   1179      */
   1180     .extern artLockObjectFromCode
   1181 ENTRY art_quick_lock_object
   1182     cbz    w0, .Lslow_lock
   1183     add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
   1184 .Lretry_lock:
   1185     ldr    w2, [xSELF, #THREAD_ID_OFFSET] // TODO: Can the thread ID really change during the loop?
   1186     ldaxr  w1, [x4]                   // acquire needed only in most common case
   1187     and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
   1188     cbnz   w3, .Lnot_unlocked         // already thin locked
   1189     // unlocked case - x1: original lock word that's zero except for the read barrier bits.
   1190     orr    x2, x1, x2                 // x2 holds thread id with count of 0 with preserved read barrier bits
   1191     stxr   w3, w2, [x4]
   1192     cbnz   w3, .Llock_stxr_fail       // store failed, retry
   1193     ret
   1194 .Lnot_unlocked:  // x1: original lock word
   1195     lsr    w3, w1, LOCK_WORD_STATE_SHIFT
   1196     cbnz   w3, .Lslow_lock            // if either of the top two bits are set, go slow path
   1197     eor    w2, w1, w2                 // lock_word.ThreadId() ^ self->ThreadId()
   1198     uxth   w2, w2                     // zero top 16 bits
   1199     cbnz   w2, .Lslow_lock            // lock word and self thread id's match -> recursive lock
   1200                                       // else contention, go to slow path
   1201     and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits.
   1202     add    w2, w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count in lock word placing in w2 to check overflow
   1203     lsr    w3, w2, #LOCK_WORD_GC_STATE_SHIFT     // if the first gc state bit is set, we overflowed.
   1204     cbnz   w3, .Lslow_lock            // if we overflow the count go slow path
   1205     add    w2, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // increment count for real
   1206     stxr   w3, w2, [x4]
   1207     cbnz   w3, .Llock_stxr_fail       // store failed, retry
   1208     ret
   1209 .Llock_stxr_fail:
   1210     b      .Lretry_lock               // retry
   1211 .Lslow_lock:
   1212     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
   1213     mov    x1, xSELF                  // pass Thread::Current
   1214     bl     artLockObjectFromCode      // (Object* obj, Thread*)
   1215     RESTORE_SAVE_REFS_ONLY_FRAME
   1216     RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1217 END art_quick_lock_object
   1218 
   1219 ENTRY art_quick_lock_object_no_inline
   1220     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case we block
   1221     mov    x1, xSELF                  // pass Thread::Current
   1222     bl     artLockObjectFromCode      // (Object* obj, Thread*)
   1223     RESTORE_SAVE_REFS_ONLY_FRAME
   1224     RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1225 END art_quick_lock_object_no_inline
   1226 
   1227     /*
   1228      * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure.
   1229      * x0 holds the possibly null object to lock.
   1230      *
   1231      * Derived from arm32 code.
   1232      */
   1233     .extern artUnlockObjectFromCode
   1234 ENTRY art_quick_unlock_object
   1235     cbz    x0, .Lslow_unlock
   1236     add    x4, x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET  // exclusive load/store has no immediate anymore
   1237 .Lretry_unlock:
   1238 #ifndef USE_READ_BARRIER
   1239     ldr    w1, [x4]
   1240 #else
   1241     ldxr   w1, [x4]                   // Need to use atomic instructions for read barrier
   1242 #endif
   1243     lsr    w2, w1, LOCK_WORD_STATE_SHIFT
   1244     cbnz   w2, .Lslow_unlock          // if either of the top two bits are set, go slow path
   1245     ldr    w2, [xSELF, #THREAD_ID_OFFSET]
   1246     and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
   1247     eor    w3, w3, w2                 // lock_word.ThreadId() ^ self->ThreadId()
   1248     uxth   w3, w3                     // zero top 16 bits
   1249     cbnz   w3, .Lslow_unlock          // do lock word and self thread id's match?
   1250     and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED  // zero the gc bits
   1251     cmp    w3, #LOCK_WORD_THIN_LOCK_COUNT_ONE
   1252     bpl    .Lrecursive_thin_unlock
   1253     // transition to unlocked
   1254     and    w3, w1, #LOCK_WORD_GC_STATE_MASK_SHIFTED  // w3: zero except for the preserved read barrier bits
   1255 #ifndef USE_READ_BARRIER
   1256     stlr   w3, [x4]
   1257 #else
   1258     stlxr  w2, w3, [x4]               // Need to use atomic instructions for read barrier
   1259     cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
   1260 #endif
   1261     ret
   1262 .Lrecursive_thin_unlock:  // w1: original lock word
   1263     sub    w1, w1, #LOCK_WORD_THIN_LOCK_COUNT_ONE  // decrement count
   1264 #ifndef USE_READ_BARRIER
   1265     str    w1, [x4]
   1266 #else
   1267     stxr   w2, w1, [x4]               // Need to use atomic instructions for read barrier
   1268     cbnz   w2, .Lunlock_stxr_fail     // store failed, retry
   1269 #endif
   1270     ret
   1271 .Lunlock_stxr_fail:
   1272     b      .Lretry_unlock             // retry
   1273 .Lslow_unlock:
   1274     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
   1275     mov    x1, xSELF                  // pass Thread::Current
   1276     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
   1277     RESTORE_SAVE_REFS_ONLY_FRAME
   1278     RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1279 END art_quick_unlock_object
   1280 
   1281 ENTRY art_quick_unlock_object_no_inline
   1282     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case exception allocation triggers GC
   1283     mov    x1, xSELF                  // pass Thread::Current
   1284     bl     artUnlockObjectFromCode    // (Object* obj, Thread*)
   1285     RESTORE_SAVE_REFS_ONLY_FRAME
   1286     RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1287 END art_quick_unlock_object_no_inline
   1288 
   1289     /*
   1290      * Entry from managed code that calls artInstanceOfFromCode and on failure calls
   1291      * artThrowClassCastExceptionForObject.
   1292      */
   1293     .extern artInstanceOfFromCode
   1294     .extern artThrowClassCastExceptionForObject
   1295 ENTRY art_quick_check_instance_of
   1296     // Store arguments and link register
   1297     // Stack needs to be 16B aligned on calls.
   1298     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
   1299     SAVE_REG xLR, 24
   1300 
   1301     // Call runtime code
   1302     bl artInstanceOfFromCode
   1303 
   1304     // Check for exception
   1305     cbz x0, .Lthrow_class_cast_exception
   1306 
   1307     // Restore and return
   1308     .cfi_remember_state
   1309     RESTORE_REG xLR, 24
   1310     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
   1311     ret
   1312     .cfi_restore_state                // Reset unwind info so following code unwinds.
   1313     .cfi_def_cfa_offset 32            // workaround for clang bug: 31975598
   1314 
   1315 .Lthrow_class_cast_exception:
   1316     // Restore
   1317     RESTORE_REG xLR, 24
   1318     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
   1319 
   1320     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
   1321     mov x2, xSELF                     // pass Thread::Current
   1322     bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
   1323     brk 0                             // We should not return here...
   1324 END art_quick_check_instance_of
   1325 
   1326 // Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
   1327 .macro POP_REG_NE xReg, offset, xExclude
   1328     .ifnc \xReg, \xExclude
   1329         ldr \xReg, [sp, #\offset]     // restore xReg
   1330         .cfi_restore \xReg
   1331     .endif
   1332 .endm
   1333 
   1334 // Restore xReg1's value from [sp, #offset] if xReg1 is not the same as xExclude.
   1335 // Restore xReg2's value from [sp, #(offset + 8)] if xReg2 is not the same as xExclude.
   1336 .macro POP_REGS_NE xReg1, xReg2, offset, xExclude
   1337     .ifc \xReg1, \xExclude
   1338         ldr \xReg2, [sp, #(\offset + 8)]        // restore xReg2
   1339     .else
   1340         .ifc \xReg2, \xExclude
   1341             ldr \xReg1, [sp, #\offset]          // restore xReg1
   1342         .else
   1343             ldp \xReg1, \xReg2, [sp, #\offset]  // restore xReg1 and xReg2
   1344         .endif
   1345     .endif
   1346     .cfi_restore \xReg1
   1347     .cfi_restore \xReg2
   1348 .endm
   1349 
   1350     /*
   1351      * Macro to insert read barrier, only used in art_quick_aput_obj.
   1352      * xDest, wDest and xObj are registers, offset is a defined literal such as
   1353      * MIRROR_OBJECT_CLASS_OFFSET. Dest needs both x and w versions of the same register to handle
   1354      * name mismatch between instructions. This macro uses the lower 32b of register when possible.
   1355      * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path.
   1356      */
   1357 .macro READ_BARRIER xDest, wDest, xObj, xTemp, wTemp, offset, number
   1358 #ifdef USE_READ_BARRIER
   1359 #ifdef USE_BAKER_READ_BARRIER
   1360     ldr \wTemp, [\xObj, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
   1361     tbnz \wTemp, #LOCK_WORD_READ_BARRIER_STATE_SHIFT, .Lrb_slowpath\number
   1362     // False dependency to avoid needing load/load fence.
   1363     add \xObj, \xObj, \xTemp, lsr #32
   1364     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
   1365     UNPOISON_HEAP_REF \wDest
   1366     b .Lrb_exit\number
   1367 #endif
   1368 .Lrb_slowpath\number:
   1369     // Store registers used in art_quick_aput_obj (x0-x4, LR), stack is 16B aligned.
   1370     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 48
   1371     SAVE_TWO_REGS x2, x3, 16
   1372     SAVE_TWO_REGS x4, xLR, 32
   1373 
   1374     // mov x0, \xRef                // pass ref in x0 (no-op for now since parameter ref is unused)
   1375     .ifnc \xObj, x1
   1376         mov x1, \xObj               // pass xObj
   1377     .endif
   1378     mov w2, #\offset                // pass offset
   1379     bl artReadBarrierSlow           // artReadBarrierSlow(ref, xObj, offset)
   1380     // No need to unpoison return value in w0, artReadBarrierSlow() would do the unpoisoning.
   1381     .ifnc \wDest, w0
   1382         mov \wDest, w0              // save return value in wDest
   1383     .endif
   1384 
   1385     // Conditionally restore saved registers
   1386     POP_REG_NE x0, 0, \xDest
   1387     POP_REG_NE x1, 8, \xDest
   1388     POP_REG_NE x2, 16, \xDest
   1389     POP_REG_NE x3, 24, \xDest
   1390     POP_REG_NE x4, 32, \xDest
   1391     RESTORE_REG xLR, 40
   1392     DECREASE_FRAME 48
   1393 .Lrb_exit\number:
   1394 #else
   1395     ldr \wDest, [\xObj, #\offset]   // Heap reference = 32b. This also zero-extends to \xDest.
   1396     UNPOISON_HEAP_REF \wDest
   1397 #endif  // USE_READ_BARRIER
   1398 .endm
   1399 
   1400 #ifdef USE_READ_BARRIER
   1401     .extern artReadBarrierSlow
   1402 #endif
   1403 ENTRY art_quick_aput_obj
   1404     cbz x2, .Ldo_aput_null
   1405     READ_BARRIER x3, w3, x0, x3, w3, MIRROR_OBJECT_CLASS_OFFSET, 0  // Heap reference = 32b
   1406                                                                     // This also zero-extends to x3
   1407     READ_BARRIER x3, w3, x3, x4, w4, MIRROR_CLASS_COMPONENT_TYPE_OFFSET, 1 // Heap reference = 32b
   1408     // This also zero-extends to x3
   1409     READ_BARRIER x4, w4, x2, x4, w4, MIRROR_OBJECT_CLASS_OFFSET, 2  // Heap reference = 32b
   1410                                                                     // This also zero-extends to x4
   1411     cmp w3, w4  // value's type == array's component type - trivial assignability
   1412     bne .Lcheck_assignability
   1413 .Ldo_aput:
   1414     add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
   1415                                                          // "Compress" = do nothing
   1416     POISON_HEAP_REF w2
   1417     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
   1418     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
   1419     lsr x0, x0, #7
   1420     strb w3, [x3, x0]
   1421     ret
   1422 .Ldo_aput_null:
   1423     add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
   1424                                                          // "Compress" = do nothing
   1425     str w2, [x3, x1, lsl #2]                             // Heap reference = 32b
   1426     ret
   1427 .Lcheck_assignability:
   1428     // Store arguments and link register
   1429     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
   1430     SAVE_TWO_REGS x2, xLR, 16
   1431 
   1432     // Call runtime code
   1433     mov x0, x3              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
   1434     mov x1, x4              // Heap reference, 32b, "uncompress" = do nothing, already zero-extended
   1435     bl artIsAssignableFromCode
   1436 
   1437     // Check for exception
   1438     cbz x0, .Lthrow_array_store_exception
   1439 
   1440     // Restore
   1441     .cfi_remember_state
   1442     RESTORE_TWO_REGS x2, xLR, 16
   1443     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
   1444 
   1445     add x3, x0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET
   1446                                                           // "Compress" = do nothing
   1447     POISON_HEAP_REF w2
   1448     str w2, [x3, x1, lsl #2]                              // Heap reference = 32b
   1449     ldr x3, [xSELF, #THREAD_CARD_TABLE_OFFSET]
   1450     lsr x0, x0, #7
   1451     strb w3, [x3, x0]
   1452     ret
   1453     .cfi_restore_state            // Reset unwind info so following code unwinds.
   1454     .cfi_def_cfa_offset 32        // workaround for clang bug: 31975598
   1455 .Lthrow_array_store_exception:
   1456     RESTORE_TWO_REGS x2, xLR, 16
   1457     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 32
   1458 
   1459     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
   1460     mov x1, x2                      // Pass value.
   1461     mov x2, xSELF                   // Pass Thread::Current.
   1462     bl artThrowArrayStoreException  // (Object*, Object*, Thread*).
   1463     brk 0                           // Unreached.
   1464 END art_quick_aput_obj
   1465 
   1466 // Macro to facilitate adding new allocation entrypoints.
   1467 .macro ONE_ARG_DOWNCALL name, entrypoint, return
   1468     .extern \entrypoint
   1469 ENTRY \name
   1470     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1471     mov    x1, xSELF                  // pass Thread::Current
   1472     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
   1473     RESTORE_SAVE_REFS_ONLY_FRAME
   1474     \return
   1475 END \name
   1476 .endm
   1477 
   1478 // Macro to facilitate adding new allocation entrypoints.
   1479 .macro TWO_ARG_DOWNCALL name, entrypoint, return
   1480     .extern \entrypoint
   1481 ENTRY \name
   1482     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1483     mov    x2, xSELF                  // pass Thread::Current
   1484     bl     \entrypoint                // (uint32_t type_idx, Method* method, Thread*)
   1485     RESTORE_SAVE_REFS_ONLY_FRAME
   1486     \return
   1487 END \name
   1488 .endm
   1489 
   1490 // Macro to facilitate adding new allocation entrypoints.
   1491 .macro THREE_ARG_DOWNCALL name, entrypoint, return
   1492     .extern \entrypoint
   1493 ENTRY \name
   1494     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1495     mov    x3, xSELF                  // pass Thread::Current
   1496     bl     \entrypoint
   1497     RESTORE_SAVE_REFS_ONLY_FRAME
   1498     \return
   1499 END \name
   1500 .endm
   1501 
   1502 // Macro to facilitate adding new allocation entrypoints.
   1503 .macro FOUR_ARG_DOWNCALL name, entrypoint, return
   1504     .extern \entrypoint
   1505 ENTRY \name
   1506     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1507     mov    x4, xSELF                  // pass Thread::Current
   1508     bl     \entrypoint                //
   1509     RESTORE_SAVE_REFS_ONLY_FRAME
   1510     \return
   1511     DELIVER_PENDING_EXCEPTION
   1512 END \name
   1513 .endm
   1514 
   1515 // Macros taking opportunity of code similarities for downcalls.
   1516 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return
   1517     .extern \entrypoint
   1518 ENTRY \name
   1519     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1520     mov    x1, xSELF                  // pass Thread::Current
   1521     bl     \entrypoint                // (uint32_t type_idx, Thread*)
   1522     RESTORE_SAVE_REFS_ONLY_FRAME
   1523     \return
   1524 END \name
   1525 .endm
   1526 
   1527 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return
   1528     .extern \entrypoint
   1529 ENTRY \name
   1530     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1531     mov    x2, xSELF                  // pass Thread::Current
   1532     bl     \entrypoint
   1533     RESTORE_SAVE_REFS_ONLY_FRAME
   1534     \return
   1535 END \name
   1536 .endm
   1537 
   1538 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return
   1539     .extern \entrypoint
   1540 ENTRY \name
   1541     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1542     mov    x3, xSELF                  // pass Thread::Current
   1543     bl     \entrypoint
   1544     RESTORE_SAVE_REFS_ONLY_FRAME
   1545     \return
   1546 END \name
   1547 .endm
   1548 
   1549 // Macro for string and type resolution and initialization.
   1550 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint
   1551     .extern \entrypoint
   1552 ENTRY \name
   1553     SETUP_SAVE_EVERYTHING_FRAME       // save everything for stack crawl
   1554     mov   x1, xSELF                   // pass Thread::Current
   1555     bl    \entrypoint                 // (int32_t index, Thread* self)
   1556     cbz   w0, 1f                      // If result is null, deliver the OOME.
   1557     .cfi_remember_state
   1558     RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
   1559     ret                        // return
   1560     .cfi_restore_state
   1561     .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
   1562 1:
   1563     DELIVER_PENDING_EXCEPTION_FRAME_READY
   1564 END \name
   1565 .endm
   1566 
   1567 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1568     cbz w0, 1f                 // result zero branch over
   1569     ret                        // return
   1570 1:
   1571     DELIVER_PENDING_EXCEPTION
   1572 .endm
   1573 
   1574     /*
   1575      * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on
   1576      * failure.
   1577      */
   1578 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1579 
   1580     /*
   1581      * Entry from managed code when uninitialized static storage, this stub will run the class
   1582      * initializer and deliver the exception on error. On success the static storage base is
   1583      * returned.
   1584      */
   1585 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode
   1586 
   1587 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type, artInitializeTypeFromCode
   1588 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode
   1589 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode
   1590 
   1591 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1592 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1593 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1594 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1595 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1596 ONE_ARG_REF_DOWNCALL art_quick_get64_static, artGet64StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1597 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1598 
   1599 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1600 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1601 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1602 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1603 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1604 TWO_ARG_REF_DOWNCALL art_quick_get64_instance, artGet64InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1605 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   1606 
   1607 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1608 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1609 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1610 TWO_ARG_REF_DOWNCALL art_quick_set64_static, artSet64StaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1611 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1612 
   1613 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1614 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1615 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1616 THREE_ARG_REF_DOWNCALL art_quick_set64_instance, artSet64InstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1617 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_W0_IS_ZERO_OR_DELIVER
   1618 
   1619 // Generate the allocation entrypoints for each allocator.
   1620 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS
   1621 // Comment out allocators that have arm64 specific asm.
   1622 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB)
   1623 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB)
   1624 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB)
   1625 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB)
   1626 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB)
   1627 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB)
   1628 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB)
   1629 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB)
   1630 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB)
   1631 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB)
   1632 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB)
   1633 
   1634 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB)
   1635 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB)
   1636 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB)
   1637 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB)
   1638 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB)
   1639 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB)
   1640 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB)
   1641 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB)
   1642 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB)
   1643 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB)
   1644 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB)
   1645 
   1646 .macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name
   1647 ENTRY \c_name
   1648     // Fast path rosalloc allocation.
   1649     // x0: type, xSELF(x19): Thread::Current
   1650     // x1-x7: free.
   1651     ldr    x3, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]  // Check if the thread local
   1652                                                               // allocation stack has room.
   1653                                                               // ldp won't work due to large offset.
   1654     ldr    x4, [xSELF, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET]
   1655     cmp    x3, x4
   1656     bhs    .Lslow_path\c_name
   1657     ldr    w3, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x3)
   1658     cmp    x3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE        // Check if the size is for a thread
   1659                                                               // local allocation. Also does the
   1660                                                               // finalizable and initialization
   1661                                                               // checks.
   1662     bhs    .Lslow_path\c_name
   1663                                                               // Compute the rosalloc bracket index
   1664                                                               // from the size. Since the size is
   1665                                                               // already aligned we can combine the
   1666                                                               // two shifts together.
   1667     add    x4, xSELF, x3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT)
   1668                                                               // Subtract pointer size since ther
   1669                                                               // are no runs for 0 byte allocations
   1670                                                               // and the size is already aligned.
   1671     ldr    x4, [x4, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)]
   1672                                                               // Load the free list head (x3). This
   1673                                                               // will be the return val.
   1674     ldr    x3, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1675     cbz    x3, .Lslow_path\c_name
   1676     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
   1677     ldr    x1, [x3, #ROSALLOC_SLOT_NEXT_OFFSET]               // Load the next pointer of the head
   1678                                                               // and update the list head with the
   1679                                                               // next pointer.
   1680     str    x1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)]
   1681                                                               // Store the class pointer in the
   1682                                                               // header. This also overwrites the
   1683                                                               // next pointer. The offsets are
   1684                                                               // asserted to match.
   1685 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET
   1686 #error "Class pointer needs to overwrite next pointer."
   1687 #endif
   1688     POISON_HEAP_REF w0
   1689     str    w0, [x3, #MIRROR_OBJECT_CLASS_OFFSET]
   1690                                                               // Fence. This is "ish" not "ishst" so
   1691                                                               // that it also ensures ordering of
   1692                                                               // the object size load with respect
   1693                                                               // to later accesses to the class
   1694                                                               // object. Alternatively we could use
   1695                                                               // "ishst" if we use load-acquire for
   1696                                                               // the class status load.
   1697                                                               // Needs to be done before pushing on
   1698                                                               // allocation since Heap::VisitObjects
   1699                                                               // relies on seeing the class pointer.
   1700                                                               // b/28790624
   1701     dmb    ish
   1702                                                               // Push the new object onto the thread
   1703                                                               // local allocation stack and
   1704                                                               // increment the thread local
   1705                                                               // allocation stack top.
   1706     ldr    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1707     str    w3, [x1], #COMPRESSED_REFERENCE_SIZE               // (Increment x1 as a side effect.)
   1708     str    x1, [xSELF, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET]
   1709                                                               // Decrement the size of the free list
   1710     ldr    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1711     sub    x1, x1, #1
   1712                                                               // TODO: consider combining this store
   1713                                                               // and the list head store above using
   1714                                                               // strd.
   1715     str    w1, [x4, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)]
   1716 
   1717     mov    x0, x3                                             // Set the return value and return.
   1718     ret
   1719 .Lslow_path\c_name:
   1720     SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
   1721     mov    x1, xSELF                                // pass Thread::Current
   1722     bl     \cxx_name
   1723     RESTORE_SAVE_REFS_ONLY_FRAME
   1724     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1725 END \c_name
   1726 .endm
   1727 
   1728 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc
   1729 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc
   1730 
   1731 .macro ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED slowPathLabel
   1732     ldr    x4, [xSELF, #THREAD_LOCAL_POS_OFFSET]
   1733     ldr    x5, [xSELF, #THREAD_LOCAL_END_OFFSET]
   1734     ldr    w7, [x0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET]  // Load the object size (x7).
   1735     add    x6, x4, x7                                         // Add object size to tlab pos.
   1736     cmp    x6, x5                                             // Check if it fits, overflow works
   1737                                                               // since the tlab pos and end are 32
   1738                                                               // bit values.
   1739     bhi    \slowPathLabel
   1740     str    x6, [xSELF, #THREAD_LOCAL_POS_OFFSET]              // Store new thread_local_pos.
   1741     ldr    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]          // Increment thread_local_objects.
   1742     add    x5, x5, #1
   1743     str    x5, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
   1744     POISON_HEAP_REF w0
   1745     str    w0, [x4, #MIRROR_OBJECT_CLASS_OFFSET]              // Store the class pointer.
   1746                                                               // Fence. This is "ish" not "ishst" so
   1747                                                               // that the code after this allocation
   1748                                                               // site will see the right values in
   1749                                                               // the fields of the class.
   1750                                                               // Alternatively we could use "ishst"
   1751                                                               // if we use load-acquire for the
   1752                                                               // object size load.)
   1753     mov    x0, x4
   1754     dmb    ish
   1755     ret
   1756 .endm
   1757 
   1758 // The common code for art_quick_alloc_object_*region_tlab
   1759 .macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint
   1760 ENTRY \name
   1761     // Fast path region tlab allocation.
   1762     // x0: type, xSELF(x19): Thread::Current
   1763     // x1-x7: free.
   1764     ALLOC_OBJECT_TLAB_FAST_PATH_RESOLVED .Lslow_path\name
   1765 .Lslow_path\name:
   1766     SETUP_SAVE_REFS_ONLY_FRAME                 // Save callee saves in case of GC.
   1767     mov    x1, xSELF                           // Pass Thread::Current.
   1768     bl     \entrypoint                         // (mirror::Class*, Thread*)
   1769     RESTORE_SAVE_REFS_ONLY_FRAME
   1770     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1771 END \name
   1772 .endm
   1773 
   1774 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB
   1775 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB
   1776 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB
   1777 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB
   1778 
   1779 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel, xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1780     and    \xTemp1, \xTemp1, #OBJECT_ALIGNMENT_MASK_TOGGLED64 // Apply alignemnt mask
   1781                                                               // (addr + 7) & ~7. The mask must
   1782                                                               // be 64 bits to keep high bits in
   1783                                                               // case of overflow.
   1784     // Negative sized arrays are handled here since xCount holds a zero extended 32 bit value.
   1785     // Negative ints become large 64 bit unsigned ints which will always be larger than max signed
   1786     // 32 bit int. Since the max shift for arrays is 3, it can not become a negative 64 bit int.
   1787     cmp    \xTemp1, #MIN_LARGE_OBJECT_THRESHOLD               // Possibly a large object, go slow
   1788     bhs    \slowPathLabel                                     // path.
   1789 
   1790     ldr    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Check tlab for space, note that
   1791                                                               // we use (end - begin) to handle
   1792                                                               // negative size arrays. It is
   1793                                                               // assumed that a negative size will
   1794                                                               // always be greater unsigned than
   1795                                                               // region size.
   1796     ldr    \xTemp2, [xSELF, #THREAD_LOCAL_END_OFFSET]
   1797     sub    \xTemp2, \xTemp2, \xTemp0
   1798     cmp    \xTemp1, \xTemp2
   1799     bhi    \slowPathLabel
   1800     // "Point of no slow path". Won't go to the slow path from here on. OK to clobber x0 and x1.
   1801                                                               // Move old thread_local_pos to x0
   1802                                                               // for the return value.
   1803     mov    x0, \xTemp0
   1804     add    \xTemp0, \xTemp0, \xTemp1
   1805     str    \xTemp0, [xSELF, #THREAD_LOCAL_POS_OFFSET]         // Store new thread_local_pos.
   1806     ldr    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]     // Increment thread_local_objects.
   1807     add    \xTemp0, \xTemp0, #1
   1808     str    \xTemp0, [xSELF, #THREAD_LOCAL_OBJECTS_OFFSET]
   1809     POISON_HEAP_REF \wClass
   1810     str    \wClass, [x0, #MIRROR_OBJECT_CLASS_OFFSET]         // Store the class pointer.
   1811     str    \wCount, [x0, #MIRROR_ARRAY_LENGTH_OFFSET]         // Store the array length.
   1812                                                               // Fence.
   1813     dmb    ishst
   1814     ret
   1815 .endm
   1816 
   1817 .macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup
   1818 ENTRY \name
   1819     // Fast path array allocation for region tlab allocation.
   1820     // x0: mirror::Class* type
   1821     // x1: int32_t component_count
   1822     // x2-x7: free.
   1823     mov    x3, x0
   1824     \size_setup x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
   1825     ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name, x3, w3, x1, w1, x4, w4, x5, w5, x6, w6
   1826 .Lslow_path\name:
   1827     // x0: mirror::Class* klass
   1828     // x1: int32_t component_count
   1829     // x2: Thread* self
   1830     SETUP_SAVE_REFS_ONLY_FRAME        // save callee saves in case of GC
   1831     mov    x2, xSELF                  // pass Thread::Current
   1832     bl     \entrypoint
   1833     RESTORE_SAVE_REFS_ONLY_FRAME
   1834     RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
   1835 END \name
   1836 .endm
   1837 
   1838 .macro COMPUTE_ARRAY_SIZE_UNKNOWN xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1839     // Array classes are never finalizable or uninitialized, no need to check.
   1840     ldr    \wTemp0, [\xClass, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type
   1841     UNPOISON_HEAP_REF \wTemp0
   1842     ldr    \wTemp0, [\xTemp0, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET]
   1843     lsr    \xTemp0, \xTemp0, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16
   1844                                                               // bits.
   1845                                                               // xCount is holding a 32 bit value,
   1846                                                               // it can not overflow.
   1847     lsl    \xTemp1, \xCount, \xTemp0                          // Calculate data size
   1848     // Add array data offset and alignment.
   1849     add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1850 #if MIRROR_LONG_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4
   1851 #error Long array data offset must be 4 greater than int array data offset.
   1852 #endif
   1853 
   1854     add    \xTemp0, \xTemp0, #1                               // Add 4 to the length only if the
   1855                                                               // component size shift is 3
   1856                                                               // (for 64 bit alignment).
   1857     and    \xTemp0, \xTemp0, #4
   1858     add    \xTemp1, \xTemp1, \xTemp0
   1859 .endm
   1860 
   1861 .macro COMPUTE_ARRAY_SIZE_8 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1862     // Add array data offset and alignment.
   1863     add    \xTemp1, \xCount, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1864 .endm
   1865 
   1866 .macro COMPUTE_ARRAY_SIZE_16 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1867     lsl    \xTemp1, \xCount, #1
   1868     // Add array data offset and alignment.
   1869     add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1870 .endm
   1871 
   1872 .macro COMPUTE_ARRAY_SIZE_32 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1873     lsl    \xTemp1, \xCount, #2
   1874     // Add array data offset and alignment.
   1875     add    \xTemp1, \xTemp1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1876 .endm
   1877 
   1878 .macro COMPUTE_ARRAY_SIZE_64 xClass, wClass, xCount, wCount, xTemp0, wTemp0, xTemp1, wTemp1, xTemp2, wTemp2
   1879     lsl    \xTemp1, \xCount, #3
   1880     // Add array data offset and alignment.
   1881     add    \xTemp1, \xTemp1, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK)
   1882 .endm
   1883 
   1884 # TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm64, remove
   1885 # the entrypoint once all backends have been updated to use the size variants.
   1886 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1887 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8
   1888 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16
   1889 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32
   1890 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64
   1891 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN
   1892 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8
   1893 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16
   1894 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32
   1895 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64
   1896 
   1897     /*
   1898      * Called by managed code when the thread has been asked to suspend.
   1899      */
   1900     .extern artTestSuspendFromCode
   1901 ENTRY art_quick_test_suspend
   1902     SETUP_SAVE_EVERYTHING_FRAME               // save callee saves for stack crawl
   1903     mov    x0, xSELF
   1904     bl     artTestSuspendFromCode             // (Thread*)
   1905     RESTORE_SAVE_EVERYTHING_FRAME
   1906     ret
   1907 END art_quick_test_suspend
   1908 
   1909 ENTRY art_quick_implicit_suspend
   1910     mov    x0, xSELF
   1911     SETUP_SAVE_REFS_ONLY_FRAME                // save callee saves for stack crawl
   1912     bl     artTestSuspendFromCode             // (Thread*)
   1913     RESTORE_SAVE_REFS_ONLY_FRAME_AND_RETURN
   1914 END art_quick_implicit_suspend
   1915 
   1916      /*
   1917      * Called by managed code that is attempting to call a method on a proxy class. On entry
   1918      * x0 holds the proxy method and x1 holds the receiver; The frame size of the invoked proxy
   1919      * method agrees with a ref and args callee save frame.
   1920      */
   1921      .extern artQuickProxyInvokeHandler
   1922 ENTRY art_quick_proxy_invoke_handler
   1923     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
   1924     mov     x2, xSELF                   // pass Thread::Current
   1925     mov     x3, sp                      // pass SP
   1926     bl      artQuickProxyInvokeHandler  // (Method* proxy method, receiver, Thread*, SP)
   1927     ldr     x2, [xSELF, THREAD_EXCEPTION_OFFSET]
   1928     cbnz    x2, .Lexception_in_proxy    // success if no exception is pending
   1929     RESTORE_SAVE_REFS_AND_ARGS_FRAME    // Restore frame
   1930     fmov    d0, x0                      // Store result in d0 in case it was float or double
   1931     ret                                 // return on success
   1932 .Lexception_in_proxy:
   1933     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1934     DELIVER_PENDING_EXCEPTION
   1935 END art_quick_proxy_invoke_handler
   1936 
   1937     /*
   1938      * Called to resolve an imt conflict.
   1939      * x0 is the conflict ArtMethod.
   1940      * xIP1 is a hidden argument that holds the target interface method's dex method index.
   1941      *
   1942      * Note that this stub writes to xIP0, xIP1, and x0.
   1943      */
   1944     .extern artInvokeInterfaceTrampoline
   1945 ENTRY art_quick_imt_conflict_trampoline
   1946     ldr xIP0, [sp, #0]  // Load referrer
   1947     ldr xIP0, [xIP0, #ART_METHOD_DEX_CACHE_METHODS_OFFSET_64]   // Load dex cache methods array
   1948     ldr xIP0, [xIP0, xIP1, lsl #POINTER_SIZE_SHIFT]  // Load interface method
   1949     ldr xIP1, [x0, #ART_METHOD_JNI_OFFSET_64]  // Load ImtConflictTable
   1950     ldr x0, [xIP1]  // Load first entry in ImtConflictTable.
   1951 .Limt_table_iterate:
   1952     cmp x0, xIP0
   1953     // Branch if found. Benchmarks have shown doing a branch here is better.
   1954     beq .Limt_table_found
   1955     // If the entry is null, the interface method is not in the ImtConflictTable.
   1956     cbz x0, .Lconflict_trampoline
   1957     // Iterate over the entries of the ImtConflictTable.
   1958     ldr x0, [xIP1, #(2 * __SIZEOF_POINTER__)]!
   1959     b .Limt_table_iterate
   1960 .Limt_table_found:
   1961     // We successfully hit an entry in the table. Load the target method
   1962     // and jump to it.
   1963     ldr x0, [xIP1, #__SIZEOF_POINTER__]
   1964     ldr xIP0, [x0, #ART_METHOD_QUICK_CODE_OFFSET_64]
   1965     br xIP0
   1966 .Lconflict_trampoline:
   1967     // Call the runtime stub to populate the ImtConflictTable and jump to the
   1968     // resolved method.
   1969     mov x0, xIP0  // Load interface method
   1970     INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline
   1971 END art_quick_imt_conflict_trampoline
   1972 
   1973 ENTRY art_quick_resolution_trampoline
   1974     SETUP_SAVE_REFS_AND_ARGS_FRAME
   1975     mov x2, xSELF
   1976     mov x3, sp
   1977     bl artQuickResolutionTrampoline  // (called, receiver, Thread*, SP)
   1978     cbz x0, 1f
   1979     mov xIP0, x0            // Remember returned code pointer in xIP0.
   1980     ldr x0, [sp, #0]        // artQuickResolutionTrampoline puts called method in *SP.
   1981     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1982     br xIP0
   1983 1:
   1984     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   1985     DELIVER_PENDING_EXCEPTION
   1986 END art_quick_resolution_trampoline
   1987 
   1988 /*
   1989  * Generic JNI frame layout:
   1990  *
   1991  * #-------------------#
   1992  * |                   |
   1993  * | caller method...  |
   1994  * #-------------------#    <--- SP on entry
   1995  * | Return X30/LR     |
   1996  * | X29/FP            |    callee save
   1997  * | X28               |    callee save
   1998  * | X27               |    callee save
   1999  * | X26               |    callee save
   2000  * | X25               |    callee save
   2001  * | X24               |    callee save
   2002  * | X23               |    callee save
   2003  * | X22               |    callee save
   2004  * | X21               |    callee save
   2005  * | X20               |    callee save
   2006  * | X19               |    callee save
   2007  * | X7                |    arg7
   2008  * | X6                |    arg6
   2009  * | X5                |    arg5
   2010  * | X4                |    arg4
   2011  * | X3                |    arg3
   2012  * | X2                |    arg2
   2013  * | X1                |    arg1
   2014  * | D7                |    float arg 8
   2015  * | D6                |    float arg 7
   2016  * | D5                |    float arg 6
   2017  * | D4                |    float arg 5
   2018  * | D3                |    float arg 4
   2019  * | D2                |    float arg 3
   2020  * | D1                |    float arg 2
   2021  * | D0                |    float arg 1
   2022  * | Method*           | <- X0
   2023  * #-------------------#
   2024  * | local ref cookie  | // 4B
   2025  * | handle scope size | // 4B
   2026  * #-------------------#
   2027  * | JNI Call Stack    |
   2028  * #-------------------#    <--- SP on native call
   2029  * |                   |
   2030  * | Stack for Regs    |    The trampoline assembly will pop these values
   2031  * |                   |    into registers for native call
   2032  * #-------------------#
   2033  * | Native code ptr   |
   2034  * #-------------------#
   2035  * | Free scratch      |
   2036  * #-------------------#
   2037  * | Ptr to (1)        |    <--- SP
   2038  * #-------------------#
   2039  */
   2040     /*
   2041      * Called to do a generic JNI down-call
   2042      */
   2043 ENTRY art_quick_generic_jni_trampoline
   2044     SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_X0
   2045 
   2046     // Save SP , so we can have static CFI info.
   2047     mov x28, sp
   2048     .cfi_def_cfa_register x28
   2049 
   2050     // This looks the same, but is different: this will be updated to point to the bottom
   2051     // of the frame when the handle scope is inserted.
   2052     mov xFP, sp
   2053 
   2054     mov xIP0, #5120
   2055     sub sp, sp, xIP0
   2056 
   2057     // prepare for artQuickGenericJniTrampoline call
   2058     // (Thread*,  SP)
   2059     //    x0      x1   <= C calling convention
   2060     //   xSELF    xFP  <= where they are
   2061 
   2062     mov x0, xSELF   // Thread*
   2063     mov x1, xFP
   2064     bl artQuickGenericJniTrampoline  // (Thread*, sp)
   2065 
   2066     // The C call will have registered the complete save-frame on success.
   2067     // The result of the call is:
   2068     // x0: pointer to native code, 0 on error.
   2069     // x1: pointer to the bottom of the used area of the alloca, can restore stack till there.
   2070 
   2071     // Check for error = 0.
   2072     cbz x0, .Lexception_in_native
   2073 
   2074     // Release part of the alloca.
   2075     mov sp, x1
   2076 
   2077     // Save the code pointer
   2078     mov xIP0, x0
   2079 
   2080     // Load parameters from frame into registers.
   2081     // TODO Check with artQuickGenericJniTrampoline.
   2082     //      Also, check again APPCS64 - the stack arguments are interleaved.
   2083     ldp x0, x1, [sp]
   2084     ldp x2, x3, [sp, #16]
   2085     ldp x4, x5, [sp, #32]
   2086     ldp x6, x7, [sp, #48]
   2087 
   2088     ldp d0, d1, [sp, #64]
   2089     ldp d2, d3, [sp, #80]
   2090     ldp d4, d5, [sp, #96]
   2091     ldp d6, d7, [sp, #112]
   2092 
   2093     add sp, sp, #128
   2094 
   2095     blr xIP0        // native call.
   2096 
   2097     // result sign extension is handled in C code
   2098     // prepare for artQuickGenericJniEndTrampoline call
   2099     // (Thread*, result, result_f)
   2100     //    x0       x1       x2        <= C calling convention
   2101     mov x1, x0      // Result (from saved).
   2102     mov x0, xSELF   // Thread register.
   2103     fmov x2, d0     // d0 will contain floating point result, but needs to go into x2
   2104 
   2105     bl artQuickGenericJniEndTrampoline
   2106 
   2107     // Pending exceptions possible.
   2108     ldr x2, [xSELF, THREAD_EXCEPTION_OFFSET]
   2109     cbnz x2, .Lexception_in_native
   2110 
   2111     // Tear down the alloca.
   2112     mov sp, x28
   2113     .cfi_def_cfa_register sp
   2114 
   2115     // Tear down the callee-save frame.
   2116     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2117 
   2118     // store into fpr, for when it's a fpr return...
   2119     fmov d0, x0
   2120     ret
   2121 
   2122 .Lexception_in_native:
   2123     // Move to x1 then sp to please assembler.
   2124     ldr x1, [xSELF, # THREAD_TOP_QUICK_FRAME_OFFSET]
   2125     mov sp, x1
   2126     .cfi_def_cfa_register sp
   2127     # This will create a new save-all frame, required by the runtime.
   2128     DELIVER_PENDING_EXCEPTION
   2129 END art_quick_generic_jni_trampoline
   2130 
   2131 /*
   2132  * Called to bridge from the quick to interpreter ABI. On entry the arguments match those
   2133  * of a quick call:
   2134  * x0 = method being called/to bridge to.
   2135  * x1..x7, d0..d7 = arguments to that method.
   2136  */
   2137 ENTRY art_quick_to_interpreter_bridge
   2138     SETUP_SAVE_REFS_AND_ARGS_FRAME         // Set up frame and save arguments.
   2139 
   2140     //  x0 will contain mirror::ArtMethod* method.
   2141     mov x1, xSELF                          // How to get Thread::Current() ???
   2142     mov x2, sp
   2143 
   2144     // uint64_t artQuickToInterpreterBridge(mirror::ArtMethod* method, Thread* self,
   2145     //                                      mirror::ArtMethod** sp)
   2146     bl   artQuickToInterpreterBridge
   2147 
   2148     RESTORE_SAVE_REFS_AND_ARGS_FRAME       // TODO: no need to restore arguments in this case.
   2149 
   2150     fmov d0, x0
   2151 
   2152     RETURN_OR_DELIVER_PENDING_EXCEPTION
   2153 END art_quick_to_interpreter_bridge
   2154 
   2155 /*
   2156  * Called to attempt to execute an obsolete method.
   2157  */
   2158 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod
   2159 
   2160 
   2161 //
   2162 // Instrumentation-related stubs
   2163 //
   2164     .extern artInstrumentationMethodEntryFromCode
   2165 ENTRY art_quick_instrumentation_entry
   2166     SETUP_SAVE_REFS_AND_ARGS_FRAME
   2167 
   2168     mov   x20, x0             // Preserve method reference in a callee-save.
   2169 
   2170     mov   x2, xSELF
   2171     mov   x3, xLR
   2172     bl    artInstrumentationMethodEntryFromCode  // (Method*, Object*, Thread*, LR)
   2173 
   2174     mov   xIP0, x0            // x0 = result of call.
   2175     mov   x0, x20             // Reload method reference.
   2176 
   2177     RESTORE_SAVE_REFS_AND_ARGS_FRAME  // Note: will restore xSELF
   2178     adr   xLR, art_quick_instrumentation_exit
   2179     br    xIP0                // Tail-call method with lr set to art_quick_instrumentation_exit.
   2180 END art_quick_instrumentation_entry
   2181 
   2182     .extern artInstrumentationMethodExitFromCode
   2183 ENTRY art_quick_instrumentation_exit
   2184     mov   xLR, #0             // Clobber LR for later checks.
   2185 
   2186     SETUP_SAVE_REFS_ONLY_FRAME
   2187 
   2188     // We need to save x0 and d0. We could use a callee-save from SETUP_REF_ONLY, but then
   2189     // we would need to fully restore it. As there are a lot of callee-save registers, it seems
   2190     // easier to have an extra small stack area.
   2191 
   2192     str x0, [sp, #-16]!       // Save integer result.
   2193     .cfi_adjust_cfa_offset 16
   2194     str d0,  [sp, #8]         // Save floating-point result.
   2195 
   2196     add   x1, sp, #16         // Pass SP.
   2197     mov   x2, x0              // Pass integer result.
   2198     fmov  x3, d0              // Pass floating-point result.
   2199     mov   x0, xSELF           // Pass Thread.
   2200     bl   artInstrumentationMethodExitFromCode    // (Thread*, SP, gpr_res, fpr_res)
   2201 
   2202     mov   xIP0, x0            // Return address from instrumentation call.
   2203     mov   xLR, x1             // r1 is holding link register if we're to bounce to deoptimize
   2204 
   2205     ldr   d0, [sp, #8]        // Restore floating-point result.
   2206     ldr   x0, [sp], #16       // Restore integer result, and drop stack area.
   2207     .cfi_adjust_cfa_offset 16
   2208 
   2209     POP_SAVE_REFS_ONLY_FRAME
   2210 
   2211     br    xIP0                // Tail-call out.
   2212 END art_quick_instrumentation_exit
   2213 
   2214     /*
   2215      * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization
   2216      * will long jump to the upcall with a special exception of -1.
   2217      */
   2218     .extern artDeoptimize
   2219 ENTRY art_quick_deoptimize
   2220     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
   2221     mov    x0, xSELF          // Pass thread.
   2222     bl     artDeoptimize      // (Thread*)
   2223     brk 0
   2224 END art_quick_deoptimize
   2225 
   2226     /*
   2227      * Compiled code has requested that we deoptimize into the interpreter. The deoptimization
   2228      * will long jump to the upcall with a special exception of -1.
   2229      */
   2230     .extern artDeoptimizeFromCompiledCode
   2231 ENTRY art_quick_deoptimize_from_compiled_code
   2232     SETUP_SAVE_EVERYTHING_FRAME
   2233     mov    x1, xSELF                      // Pass thread.
   2234     bl     artDeoptimizeFromCompiledCode  // (DeoptimizationKind, Thread*)
   2235     brk 0
   2236 END art_quick_deoptimize_from_compiled_code
   2237 
   2238 
   2239     /*
   2240      * String's indexOf.
   2241      *
   2242      * TODO: Not very optimized.
   2243      * On entry:
   2244      *    x0:   string object (known non-null)
   2245      *    w1:   char to match (known <= 0xFFFF)
   2246      *    w2:   Starting offset in string data
   2247      */
   2248 ENTRY art_quick_indexof
   2249 #if (STRING_COMPRESSION_FEATURE)
   2250     ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
   2251 #else
   2252     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
   2253 #endif
   2254     add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
   2255 #if (STRING_COMPRESSION_FEATURE)
   2256     /* w4 holds count (with flag) and w3 holds actual length */
   2257     lsr   w3, w4, #1
   2258 #endif
   2259     /* Clamp start to [0..count] */
   2260     cmp   w2, #0
   2261     csel  w2, wzr, w2, lt
   2262     cmp   w2, w3
   2263     csel  w2, w3, w2, gt
   2264 
   2265     /* Save a copy to compute result */
   2266     mov   x5, x0
   2267 
   2268 #if (STRING_COMPRESSION_FEATURE)
   2269     tbz   w4, #0, .Lstring_indexof_compressed
   2270 #endif
   2271     /* Build pointer to start of data to compare and pre-bias */
   2272     add   x0, x0, x2, lsl #1
   2273     sub   x0, x0, #2
   2274     /* Compute iteration count */
   2275     sub   w2, w3, w2
   2276 
   2277     /*
   2278      * At this point we have:
   2279      *  x0: start of the data to test
   2280      *  w1: char to compare
   2281      *  w2: iteration count
   2282      *  x5: original start of string data
   2283      */
   2284 
   2285     subs  w2, w2, #4
   2286     b.lt  .Lindexof_remainder
   2287 
   2288 .Lindexof_loop4:
   2289     ldrh  w6, [x0, #2]!
   2290     ldrh  w7, [x0, #2]!
   2291     ldrh  wIP0, [x0, #2]!
   2292     ldrh  wIP1, [x0, #2]!
   2293     cmp   w6, w1
   2294     b.eq  .Lmatch_0
   2295     cmp   w7, w1
   2296     b.eq  .Lmatch_1
   2297     cmp   wIP0, w1
   2298     b.eq  .Lmatch_2
   2299     cmp   wIP1, w1
   2300     b.eq  .Lmatch_3
   2301     subs  w2, w2, #4
   2302     b.ge  .Lindexof_loop4
   2303 
   2304 .Lindexof_remainder:
   2305     adds  w2, w2, #4
   2306     b.eq  .Lindexof_nomatch
   2307 
   2308 .Lindexof_loop1:
   2309     ldrh  w6, [x0, #2]!
   2310     cmp   w6, w1
   2311     b.eq  .Lmatch_3
   2312     subs  w2, w2, #1
   2313     b.ne  .Lindexof_loop1
   2314 
   2315 .Lindexof_nomatch:
   2316     mov   x0, #-1
   2317     ret
   2318 
   2319 .Lmatch_0:
   2320     sub   x0, x0, #6
   2321     sub   x0, x0, x5
   2322     asr   x0, x0, #1
   2323     ret
   2324 .Lmatch_1:
   2325     sub   x0, x0, #4
   2326     sub   x0, x0, x5
   2327     asr   x0, x0, #1
   2328     ret
   2329 .Lmatch_2:
   2330     sub   x0, x0, #2
   2331     sub   x0, x0, x5
   2332     asr   x0, x0, #1
   2333     ret
   2334 .Lmatch_3:
   2335     sub   x0, x0, x5
   2336     asr   x0, x0, #1
   2337     ret
   2338 #if (STRING_COMPRESSION_FEATURE)
   2339    /*
   2340     * Comparing compressed string character-per-character with
   2341     * input character
   2342     */
   2343 .Lstring_indexof_compressed:
   2344     add   x0, x0, x2
   2345     sub   x0, x0, #1
   2346     sub   w2, w3, w2
   2347 .Lstring_indexof_compressed_loop:
   2348     subs  w2, w2, #1
   2349     b.lt  .Lindexof_nomatch
   2350     ldrb  w6, [x0, #1]!
   2351     cmp   w6, w1
   2352     b.eq  .Lstring_indexof_compressed_matched
   2353     b     .Lstring_indexof_compressed_loop
   2354 .Lstring_indexof_compressed_matched:
   2355     sub   x0, x0, x5
   2356     ret
   2357 #endif
   2358 END art_quick_indexof
   2359 
   2360     /*
   2361      * Create a function `name` calling the ReadBarrier::Mark routine,
   2362      * getting its argument and returning its result through W register
   2363      * `wreg` (corresponding to X register `xreg`), saving and restoring
   2364      * all caller-save registers.
   2365      *
   2366      * If `wreg` is different from `w0`, the generated function follows a
   2367      * non-standard runtime calling convention:
   2368      * - register `wreg` is used to pass the (sole) argument of this
   2369      *   function (instead of W0);
   2370      * - register `wreg` is used to return the result of this function
   2371      *   (instead of W0);
   2372      * - W0 is treated like a normal (non-argument) caller-save register;
   2373      * - everything else is the same as in the standard runtime calling
   2374      *   convention (e.g. standard callee-save registers are preserved).
   2375      */
   2376 .macro READ_BARRIER_MARK_REG name, wreg, xreg
   2377 ENTRY \name
   2378     // Reference is null, no work to do at all.
   2379     cbz \wreg, .Lret_rb_\name
   2380     // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
   2381     ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2382     tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
   2383 .Lret_rb_\name:
   2384     ret
   2385 .Lnot_marked_rb_\name:
   2386     // Check if the top two bits are one, if this is the case it is a forwarding address.
   2387     tst   wIP0, wIP0, lsl #1
   2388     bmi   .Lret_forwarding_address\name
   2389 .Lslow_rb_\name:
   2390     /*
   2391      * Allocate 44 stack slots * 8 = 352 bytes:
   2392      * - 20 slots for core registers X0-15, X17-X19, LR
   2393      * - 24 slots for floating-point registers D0-D7 and D16-D31
   2394      */
   2395     // We must not clobber IP1 since code emitted for HLoadClass and HLoadString
   2396     // relies on IP1 being preserved.
   2397     // Save all potentially live caller-save core registers.
   2398     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
   2399     SAVE_TWO_REGS  x2,  x3, 16
   2400     SAVE_TWO_REGS  x4,  x5, 32
   2401     SAVE_TWO_REGS  x6,  x7, 48
   2402     SAVE_TWO_REGS  x8,  x9, 64
   2403     SAVE_TWO_REGS x10, x11, 80
   2404     SAVE_TWO_REGS x12, x13, 96
   2405     SAVE_TWO_REGS x14, x15, 112
   2406     SAVE_TWO_REGS x17, x18, 128  // Skip x16, i.e. IP0.
   2407     SAVE_TWO_REGS x19, xLR, 144  // Save also return address.
   2408     // Save all potentially live caller-save floating-point registers.
   2409     stp   d0, d1,   [sp, #160]
   2410     stp   d2, d3,   [sp, #176]
   2411     stp   d4, d5,   [sp, #192]
   2412     stp   d6, d7,   [sp, #208]
   2413     stp   d16, d17, [sp, #224]
   2414     stp   d18, d19, [sp, #240]
   2415     stp   d20, d21, [sp, #256]
   2416     stp   d22, d23, [sp, #272]
   2417     stp   d24, d25, [sp, #288]
   2418     stp   d26, d27, [sp, #304]
   2419     stp   d28, d29, [sp, #320]
   2420     stp   d30, d31, [sp, #336]
   2421 
   2422     .ifnc \wreg, w0
   2423       mov   w0, \wreg                   // Pass arg1 - obj from `wreg`
   2424     .endif
   2425     bl    artReadBarrierMark            // artReadBarrierMark(obj)
   2426     .ifnc \wreg, w0
   2427       mov   \wreg, w0                   // Return result into `wreg`
   2428     .endif
   2429 
   2430     // Restore core regs, except `xreg`, as `wreg` is used to return the
   2431     // result of this function (simply remove it from the stack instead).
   2432     POP_REGS_NE x0, x1,   0,   \xreg
   2433     POP_REGS_NE x2, x3,   16,  \xreg
   2434     POP_REGS_NE x4, x5,   32,  \xreg
   2435     POP_REGS_NE x6, x7,   48,  \xreg
   2436     POP_REGS_NE x8, x9,   64,  \xreg
   2437     POP_REGS_NE x10, x11, 80,  \xreg
   2438     POP_REGS_NE x12, x13, 96,  \xreg
   2439     POP_REGS_NE x14, x15, 112, \xreg
   2440     POP_REGS_NE x17, x18, 128, \xreg
   2441     POP_REGS_NE x19, xLR, 144, \xreg  // Restore also return address.
   2442     // Restore floating-point registers.
   2443     ldp   d0, d1,   [sp, #160]
   2444     ldp   d2, d3,   [sp, #176]
   2445     ldp   d4, d5,   [sp, #192]
   2446     ldp   d6, d7,   [sp, #208]
   2447     ldp   d16, d17, [sp, #224]
   2448     ldp   d18, d19, [sp, #240]
   2449     ldp   d20, d21, [sp, #256]
   2450     ldp   d22, d23, [sp, #272]
   2451     ldp   d24, d25, [sp, #288]
   2452     ldp   d26, d27, [sp, #304]
   2453     ldp   d28, d29, [sp, #320]
   2454     ldp   d30, d31, [sp, #336]
   2455     // Remove frame and return.
   2456     DECREASE_FRAME 352
   2457     ret
   2458 .Lret_forwarding_address\name:
   2459     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2460     // in the top 2 bits of the lock word.
   2461     lsl   \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2462     ret
   2463 END \name
   2464 .endm
   2465 
   2466 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, w0,  x0
   2467 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, w1,  x1
   2468 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, w2,  x2
   2469 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, w3,  x3
   2470 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, w4,  x4
   2471 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, w5,  x5
   2472 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, w6,  x6
   2473 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, w7,  x7
   2474 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, w8,  x8
   2475 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, w9,  x9
   2476 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, w10, x10
   2477 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, w11, x11
   2478 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg12, w12, x12
   2479 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg13, w13, x13
   2480 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg14, w14, x14
   2481 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg15, w15, x15
   2482 // READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg16, w16, x16 ip0 is blocked
   2483 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg17, w17, x17
   2484 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg18, w18, x18
   2485 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg19, w19, x19
   2486 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg20, w20, x20
   2487 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg21, w21, x21
   2488 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg22, w22, x22
   2489 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg23, w23, x23
   2490 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg24, w24, x24
   2491 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg25, w25, x25
   2492 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg26, w26, x26
   2493 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg27, w27, x27
   2494 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg28, w28, x28
   2495 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg29, w29, x29
   2496 
   2497 
   2498 .macro SELECT_X_OR_W_FOR_MACRO macro_to_use, x, w, xreg
   2499     .if \xreg
   2500       \macro_to_use \x
   2501     .else
   2502       \macro_to_use \w
   2503     .endif
   2504 .endm
   2505 
   2506 .macro FOR_REGISTERS macro_for_register, macro_for_reserved_register, xreg
   2507     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x0, w0, \xreg
   2508     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x1, w1, \xreg
   2509     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x2, w2, \xreg
   2510     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x3, w3, \xreg
   2511     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x4, w4, \xreg
   2512     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x5, w5, \xreg
   2513     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x6, w6, \xreg
   2514     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x7, w7, \xreg
   2515     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x8, w8, \xreg
   2516     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x9, w9, \xreg
   2517     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x10, w10, \xreg
   2518     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x11, w11, \xreg
   2519     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x12, w12, \xreg
   2520     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x13, w13, \xreg
   2521     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x14, w14, \xreg
   2522     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x15, w15, \xreg
   2523     \macro_for_reserved_register  // IP0 is reserved
   2524     \macro_for_reserved_register  // IP1 is reserved
   2525     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x18, w18, \xreg
   2526     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x19, w19, \xreg
   2527     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x20, w20, \xreg
   2528     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x21, w21, \xreg
   2529     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x22, w22, \xreg
   2530     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x23, w23, \xreg
   2531     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x24, w24, \xreg
   2532     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x25, w25, \xreg
   2533     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x26, w26, \xreg
   2534     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x27, w27, \xreg
   2535     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x28, w28, \xreg
   2536     SELECT_X_OR_W_FOR_MACRO \macro_for_register, x29, w29, \xreg
   2537     \macro_for_reserved_register  // lr is reserved
   2538     \macro_for_reserved_register  // sp is reserved
   2539 .endm
   2540 
   2541 .macro FOR_XREGISTERS macro_for_register, macro_for_reserved_register
   2542     FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 1
   2543 .endm
   2544 
   2545 .macro FOR_WREGISTERS macro_for_register, macro_for_reserved_register
   2546     FOR_REGISTERS \macro_for_register, \macro_for_reserved_register, /* xreg */ 0
   2547 .endm
   2548 
   2549 .macro BRK0_BRK0
   2550     brk 0
   2551     brk 0
   2552 .endm
   2553 
   2554 #if BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET
   2555 #error "Array and field introspection code sharing requires same LDR offset."
   2556 #endif
   2557 .macro INTROSPECTION_ARRAY_LOAD index_reg
   2558     ldr   wIP0, [xIP0, \index_reg, lsl #2]
   2559     b     art_quick_read_barrier_mark_introspection
   2560 .endm
   2561 
   2562 .macro MOV_WIP0_TO_WREG_AND_BL_LR reg
   2563     mov   \reg, wIP0
   2564     br    lr  // Do not use RET as we do not enter the entrypoint with "BL".
   2565 .endm
   2566 
   2567 .macro READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH ldr_offset
   2568     /*
   2569      * Allocate 44 stack slots * 8 = 352 bytes:
   2570      * - 19 slots for core registers X0-15, X18-X19, LR
   2571      * - 1 slot padding
   2572      * - 24 slots for floating-point registers D0-D7 and D16-D31
   2573      */
   2574     // Save all potentially live caller-save core registers.
   2575     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 352
   2576     SAVE_TWO_REGS  x2,  x3, 16
   2577     SAVE_TWO_REGS  x4,  x5, 32
   2578     SAVE_TWO_REGS  x6,  x7, 48
   2579     SAVE_TWO_REGS  x8,  x9, 64
   2580     SAVE_TWO_REGS x10, x11, 80
   2581     SAVE_TWO_REGS x12, x13, 96
   2582     SAVE_TWO_REGS x14, x15, 112
   2583     SAVE_TWO_REGS x18, x19, 128       // Skip x16, x17, i.e. IP0, IP1.
   2584     SAVE_REG      xLR,      144       // Save return address, skip padding at 152.
   2585     // Save all potentially live caller-save floating-point registers.
   2586     stp   d0, d1,   [sp, #160]
   2587     stp   d2, d3,   [sp, #176]
   2588     stp   d4, d5,   [sp, #192]
   2589     stp   d6, d7,   [sp, #208]
   2590     stp   d16, d17, [sp, #224]
   2591     stp   d18, d19, [sp, #240]
   2592     stp   d20, d21, [sp, #256]
   2593     stp   d22, d23, [sp, #272]
   2594     stp   d24, d25, [sp, #288]
   2595     stp   d26, d27, [sp, #304]
   2596     stp   d28, d29, [sp, #320]
   2597     stp   d30, d31, [sp, #336]
   2598 
   2599     mov   x0, xIP0
   2600     bl    artReadBarrierMark          // artReadBarrierMark(obj)
   2601     mov   xIP0, x0
   2602 
   2603     // Restore core regs, except x0 and x1 as the return register switch case
   2604     // address calculation is smoother with an extra register.
   2605     RESTORE_TWO_REGS  x2,  x3, 16
   2606     RESTORE_TWO_REGS  x4,  x5, 32
   2607     RESTORE_TWO_REGS  x6,  x7, 48
   2608     RESTORE_TWO_REGS  x8,  x9, 64
   2609     RESTORE_TWO_REGS x10, x11, 80
   2610     RESTORE_TWO_REGS x12, x13, 96
   2611     RESTORE_TWO_REGS x14, x15, 112
   2612     RESTORE_TWO_REGS x18, x19, 128    // Skip x16, x17, i.e. IP0, IP1.
   2613     RESTORE_REG      xLR,      144    // Restore return address.
   2614     // Restore caller-save floating-point registers.
   2615     ldp   d0, d1,   [sp, #160]
   2616     ldp   d2, d3,   [sp, #176]
   2617     ldp   d4, d5,   [sp, #192]
   2618     ldp   d6, d7,   [sp, #208]
   2619     ldp   d16, d17, [sp, #224]
   2620     ldp   d18, d19, [sp, #240]
   2621     ldp   d20, d21, [sp, #256]
   2622     ldp   d22, d23, [sp, #272]
   2623     ldp   d24, d25, [sp, #288]
   2624     ldp   d26, d27, [sp, #304]
   2625     ldp   d28, d29, [sp, #320]
   2626     ldp   d30, d31, [sp, #336]
   2627 
   2628     ldr   x0, [lr, #\ldr_offset]      // Load the instruction.
   2629     adr   xIP1, .Lmark_introspection_return_switch
   2630     bfi   xIP1, x0, #3, #5            // Calculate switch case address.
   2631     RESTORE_TWO_REGS_DECREASE_FRAME x0, x1, 352
   2632     br    xIP1
   2633 .endm
   2634 
   2635     /*
   2636      * Use introspection to load a reference from the same address as the LDR
   2637      * instruction in generated code would load (unless loaded by the thunk,
   2638      * see below), call ReadBarrier::Mark() with that reference if needed
   2639      * and return it in the same register as the LDR instruction would load.
   2640      *
   2641      * The entrypoint is called through a thunk that differs across load kinds.
   2642      * For field and array loads the LDR instruction in generated code follows
   2643      * the branch to the thunk, i.e. the LDR is at [LR, #-4], and the thunk
   2644      * knows the holder and performs the gray bit check, returning to the LDR
   2645      * instruction if the object is not gray, so this entrypoint no longer
   2646      * needs to know anything about the holder. For GC root loads, the LDR
   2647      * instruction in generated code precedes the branch to the thunk (i.e.
   2648      * the LDR is at [LR, #-8]) and the thunk does not do the gray bit check.
   2649      *
   2650      * For field accesses and array loads with a constant index the thunk loads
   2651      * the reference into IP0 using introspection and calls the main entrypoint,
   2652      * art_quick_read_barrier_mark_introspection. With heap poisoning enabled,
   2653      * the passed reference is poisoned.
   2654      *
   2655      * For array accesses with non-constant index, the thunk inserts the bits
   2656      * 16-21 of the LDR instruction to the entrypoint address, effectively
   2657      * calculating a switch case label based on the index register (bits 16-20)
   2658      * and adding an extra offset (bit 21 is set) to differentiate from the
   2659      * main entrypoint, then moves the base register to IP0 and jumps to the
   2660      * switch case. Therefore we need to align the main entrypoint to 512 bytes,
   2661      * accounting for a 256-byte offset followed by 32 array entrypoints
   2662      * starting at art_quick_read_barrier_mark_introspection_arrays, each
   2663      * containing an LDR (register) and a branch to the main entrypoint.
   2664      *
   2665      * For GC root accesses we cannot use the main entrypoint because of the
   2666      * different offset where the LDR instruction in generated code is located.
   2667      * (And even with heap poisoning enabled, GC roots are not poisoned.)
   2668      * To re-use the same entrypoint pointer in generated code, we make sure
   2669      * that the gc root entrypoint (a copy of the entrypoint with a different
   2670      * offset for introspection loads) is located at a known offset (768 bytes,
   2671      * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main
   2672      * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves
   2673      * the root register to IP0 and jumps to the customized entrypoint,
   2674      * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also
   2675      * performs all the fast-path checks, so we need just the slow path.
   2676      *
   2677      * The code structure is
   2678      *   art_quick_read_barrier_mark_introspection:
   2679      *     Up to 256 bytes for the main entrypoint code.
   2680      *     Padding to 256 bytes if needed.
   2681      *   art_quick_read_barrier_mark_introspection_arrays:
   2682      *     Exactly 256 bytes for array load switch cases (32x2 instructions).
   2683      *   .Lmark_introspection_return_switch:
   2684      *     Exactly 256 bytes for return switch cases (32x2 instructions).
   2685      *   art_quick_read_barrier_mark_introspection_gc_roots:
   2686      *     GC root entrypoint code.
   2687      */
   2688     .balign 512
   2689 ENTRY art_quick_read_barrier_mark_introspection
   2690     // At this point, IP0 contains the reference, IP1 can be freely used.
   2691     // For heap poisoning, the reference is poisoned, so unpoison it first.
   2692     UNPOISON_HEAP_REF wIP0
   2693     // If reference is null, just return it in the right register.
   2694     cbz   wIP0, .Lmark_introspection_return
   2695     // Use wIP1 as temp and check the mark bit of the reference.
   2696     ldr   wIP1, [xIP0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
   2697     tbz   wIP1, #LOCK_WORD_MARK_BIT_SHIFT, .Lmark_introspection_unmarked
   2698 .Lmark_introspection_return:
   2699     // Without an extra register for the return switch case address calculation,
   2700     // we exploit the high word of the xIP0 to temporarily store the ref_reg*8,
   2701     // so the return switch below must move wIP0 instead of xIP0 to the register.
   2702     ldr   wIP1, [lr, #BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET]  // Load the instruction.
   2703     bfi   xIP0, xIP1, #(32 + 3), #5   // Extract ref_reg*8 to high word in xIP0.
   2704     adr   xIP1, .Lmark_introspection_return_switch
   2705     bfxil xIP1, xIP0, #32, #8         // Calculate return switch case address.
   2706     br    xIP1
   2707 .Lmark_introspection_unmarked:
   2708     // Check if the top two bits are one, if this is the case it is a forwarding address.
   2709     tst   wIP1, wIP1, lsl #1
   2710     bmi   .Lmark_introspection_forwarding_address
   2711     READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_FIELD_LDR_OFFSET
   2712 
   2713 .Lmark_introspection_forwarding_address:
   2714     // Shift left by the forwarding address shift. This clears out the state bits since they are
   2715     // in the top 2 bits of the lock word.
   2716     lsl   wIP0, wIP1, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
   2717     b .Lmark_introspection_return
   2718 
   2719     // We're very close to the alloted 256B for the entrypoint code before the
   2720     // array switch cases. Should we go a little bit over the limit, we can
   2721     // move some code after the array switch cases and return switch cases.
   2722     .balign 256
   2723     .hidden art_quick_read_barrier_mark_introspection_arrays
   2724     .global art_quick_read_barrier_mark_introspection_arrays
   2725 art_quick_read_barrier_mark_introspection_arrays:
   2726     FOR_XREGISTERS INTROSPECTION_ARRAY_LOAD, BRK0_BRK0
   2727 .Lmark_introspection_return_switch:
   2728     FOR_WREGISTERS MOV_WIP0_TO_WREG_AND_BL_LR, BRK0_BRK0
   2729     .hidden art_quick_read_barrier_mark_introspection_gc_roots
   2730     .global art_quick_read_barrier_mark_introspection_gc_roots
   2731 art_quick_read_barrier_mark_introspection_gc_roots:
   2732     READ_BARRIER_MARK_INTROSPECTION_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_OFFSET
   2733 END art_quick_read_barrier_mark_introspection
   2734 
   2735 .extern artInvokePolymorphic
   2736 ENTRY art_quick_invoke_polymorphic
   2737     SETUP_SAVE_REFS_AND_ARGS_FRAME                // Save callee saves in case allocation triggers GC.
   2738     mov     x2, xSELF
   2739     mov     x3, sp
   2740     INCREASE_FRAME 16                             // Reserve space for JValue result.
   2741     str     xzr, [sp, #0]                         // Initialize result to zero.
   2742     mov     x0, sp                                // Set r0 to point to result.
   2743     bl      artInvokePolymorphic                  // ArtInvokePolymorphic(result, receiver, thread, save_area)
   2744     uxtb    w0, w0                                // Result is the return type descriptor as a char.
   2745     sub     w0, w0, 'A'                           // Convert to zero based index.
   2746     cmp     w0, 'Z' - 'A'
   2747     bhi     .Lcleanup_and_return                  // Clean-up if out-of-bounds.
   2748     adrp    x1, .Lhandler_table                   // Compute address of handler table.
   2749     add     x1, x1, :lo12:.Lhandler_table
   2750     ldrb    w0, [x1, w0, uxtw]                    // Lookup handler offset in handler table.
   2751     adr     x1, .Lstart_of_handlers
   2752     add     x0, x1, w0, sxtb #2                   // Convert relative offset to absolute address.
   2753     br      x0                                    // Branch to handler.
   2754 
   2755 .Lstart_of_handlers:
   2756 .Lstore_boolean_result:
   2757     ldrb    w0, [sp]
   2758     b       .Lcleanup_and_return
   2759 .Lstore_char_result:
   2760     ldrh    w0, [sp]
   2761     b       .Lcleanup_and_return
   2762 .Lstore_float_result:
   2763     ldr     s0, [sp]
   2764     str     s0, [sp, #32]
   2765     b       .Lcleanup_and_return
   2766 .Lstore_double_result:
   2767     ldr     d0, [sp]
   2768     str     d0, [sp, #32]
   2769     b       .Lcleanup_and_return
   2770 .Lstore_long_result:
   2771     ldr     x0, [sp]
   2772     // Fall-through
   2773 .Lcleanup_and_return:
   2774     DECREASE_FRAME 16
   2775     RESTORE_SAVE_REFS_AND_ARGS_FRAME
   2776     RETURN_OR_DELIVER_PENDING_EXCEPTION_X1
   2777 
   2778     .section    .rodata                           // Place handler table in read-only section away from text.
   2779     .align  2
   2780 .macro HANDLER_TABLE_OFFSET handler_label
   2781     .byte (\handler_label - .Lstart_of_handlers) / 4
   2782 .endm
   2783 .Lhandler_table:
   2784     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // A
   2785     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // B (byte)
   2786     HANDLER_TABLE_OFFSET(.Lstore_char_result)     // C (char)
   2787     HANDLER_TABLE_OFFSET(.Lstore_double_result)   // D (double)
   2788     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // E
   2789     HANDLER_TABLE_OFFSET(.Lstore_float_result)    // F (float)
   2790     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // G
   2791     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // H
   2792     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // I (int)
   2793     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // J (long)
   2794     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // K
   2795     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // L (object - references are compressed and only 32-bits)
   2796     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // M
   2797     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // N
   2798     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // O
   2799     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // P
   2800     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Q
   2801     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // R
   2802     HANDLER_TABLE_OFFSET(.Lstore_long_result)     // S (short)
   2803     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // T
   2804     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // U
   2805     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // V (void)
   2806     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // W
   2807     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // X
   2808     HANDLER_TABLE_OFFSET(.Lcleanup_and_return)    // Y
   2809     HANDLER_TABLE_OFFSET(.Lstore_boolean_result)  // Z (boolean)
   2810     .text
   2811 
   2812 END  art_quick_invoke_polymorphic
   2813