1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "asm_support_arm.S" 18 #include "interpreter/cfi_asm_support.h" 19 20 #include "arch/quick_alloc_entrypoints.S" 21 22 /* Deliver the given exception */ 23 .extern artDeliverExceptionFromCode 24 /* Deliver an exception pending on a thread */ 25 .extern artDeliverPendingException 26 27 /* 28 * Macro to spill the GPRs. 29 */ 30 .macro SPILL_ALL_CALLEE_SAVE_GPRS 31 push {r4-r11, lr} @ 9 words (36 bytes) of callee saves. 32 .cfi_adjust_cfa_offset 36 33 .cfi_rel_offset r4, 0 34 .cfi_rel_offset r5, 4 35 .cfi_rel_offset r6, 8 36 .cfi_rel_offset r7, 12 37 .cfi_rel_offset r8, 16 38 .cfi_rel_offset r9, 20 39 .cfi_rel_offset r10, 24 40 .cfi_rel_offset r11, 28 41 .cfi_rel_offset lr, 32 42 .endm 43 44 /* 45 * Macro that sets up the callee save frame to conform with 46 * Runtime::CreateCalleeSaveMethod(kSaveAllCalleeSaves) 47 */ 48 .macro SETUP_SAVE_ALL_CALLEE_SAVES_FRAME rTemp 49 SPILL_ALL_CALLEE_SAVE_GPRS @ 9 words (36 bytes) of callee saves. 50 vpush {s16-s31} @ 16 words (64 bytes) of floats. 51 .cfi_adjust_cfa_offset 64 52 sub sp, #12 @ 3 words of space, bottom word will hold Method* 53 .cfi_adjust_cfa_offset 12 54 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 55 @ Load kSaveAllCalleeSaves Method* into rTemp. 56 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_ALL_CALLEE_SAVES_METHOD_OFFSET] 57 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 58 str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 59 60 // Ugly compile-time check, but we only have the preprocessor. 61 #if (FRAME_SIZE_SAVE_ALL_CALLEE_SAVES != 36 + 64 + 12) 62 #error "FRAME_SIZE_SAVE_ALL_CALLEE_SAVES(ARM) size not as expected." 63 #endif 64 .endm 65 66 /* 67 * Macro that sets up the callee save frame to conform with 68 * Runtime::CreateCalleeSaveMethod(kSaveRefsOnly). 69 */ 70 .macro SETUP_SAVE_REFS_ONLY_FRAME rTemp 71 // Note: We could avoid saving R8 in the case of Baker read 72 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 73 // later; but it's not worth handling this special case. 74 push {r5-r8, r10-r11, lr} @ 7 words of callee saves 75 .cfi_adjust_cfa_offset 28 76 .cfi_rel_offset r5, 0 77 .cfi_rel_offset r6, 4 78 .cfi_rel_offset r7, 8 79 .cfi_rel_offset r8, 12 80 .cfi_rel_offset r10, 16 81 .cfi_rel_offset r11, 20 82 .cfi_rel_offset lr, 24 83 sub sp, #4 @ bottom word will hold Method* 84 .cfi_adjust_cfa_offset 4 85 RUNTIME_CURRENT2 \rTemp @ Load Runtime::Current into rTemp. 86 @ Load kSaveRefsOnly Method* into rTemp. 87 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_ONLY_METHOD_OFFSET] 88 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 89 str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 90 91 // Ugly compile-time check, but we only have the preprocessor. 92 #if (FRAME_SIZE_SAVE_REFS_ONLY != 28 + 4) 93 #error "FRAME_SIZE_SAVE_REFS_ONLY(ARM) size not as expected." 94 #endif 95 .endm 96 97 .macro RESTORE_SAVE_REFS_ONLY_FRAME 98 add sp, #4 @ bottom word holds Method* 99 .cfi_adjust_cfa_offset -4 100 // Note: Likewise, we could avoid restoring R8 in the case of Baker 101 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 102 // later; but it's not worth handling this special case. 103 pop {r5-r8, r10-r11, lr} @ 7 words of callee saves 104 .cfi_restore r5 105 .cfi_restore r6 106 .cfi_restore r7 107 .cfi_restore r8 108 .cfi_restore r10 109 .cfi_restore r11 110 .cfi_restore lr 111 .cfi_adjust_cfa_offset -28 112 .endm 113 114 /* 115 * Macro that sets up the callee save frame to conform with 116 * Runtime::CreateCalleeSaveMethod(kSaveRefsAndArgs). 117 */ 118 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 119 // Note: We could avoid saving R8 in the case of Baker read 120 // barriers, as it is overwritten by REFRESH_MARKING_REGISTER 121 // later; but it's not worth handling this special case. 122 push {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves and args. 123 .cfi_adjust_cfa_offset 40 124 .cfi_rel_offset r1, 0 125 .cfi_rel_offset r2, 4 126 .cfi_rel_offset r3, 8 127 .cfi_rel_offset r5, 12 128 .cfi_rel_offset r6, 16 129 .cfi_rel_offset r7, 20 130 .cfi_rel_offset r8, 24 131 .cfi_rel_offset r10, 28 132 .cfi_rel_offset r11, 32 133 .cfi_rel_offset lr, 36 134 vpush {s0-s15} @ 16 words of float args. 135 .cfi_adjust_cfa_offset 64 136 sub sp, #8 @ 2 words of space, alignment padding and Method* 137 .cfi_adjust_cfa_offset 8 138 // Ugly compile-time check, but we only have the preprocessor. 139 #if (FRAME_SIZE_SAVE_REFS_AND_ARGS != 40 + 64 + 8) 140 #error "FRAME_SIZE_SAVE_REFS_AND_ARGS(ARM) size not as expected." 141 #endif 142 .endm 143 144 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME rTemp 145 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 146 RUNTIME_CURRENT3 \rTemp @ Load Runtime::Current into rTemp. 147 @ Load kSaveRefsAndArgs Method* into rTemp. 148 ldr \rTemp, [\rTemp, #RUNTIME_SAVE_REFS_AND_ARGS_METHOD_OFFSET] 149 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 150 str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 151 .endm 152 153 .macro SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 154 SETUP_SAVE_REFS_AND_ARGS_FRAME_REGISTERS_ONLY 155 str r0, [sp, #0] @ Store ArtMethod* to bottom of stack. 156 str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 157 .endm 158 159 .macro RESTORE_SAVE_REFS_AND_ARGS_FRAME 160 add sp, #8 @ rewind sp 161 .cfi_adjust_cfa_offset -8 162 vpop {s0-s15} 163 .cfi_adjust_cfa_offset -64 164 // Note: Likewise, we could avoid restoring X20 in the case of Baker 165 // read barriers, as it is overwritten by REFRESH_MARKING_REGISTER 166 // later; but it's not worth handling this special case. 167 pop {r1-r3, r5-r8, r10-r11, lr} @ 10 words of callee saves 168 .cfi_restore r1 169 .cfi_restore r2 170 .cfi_restore r3 171 .cfi_restore r5 172 .cfi_restore r6 173 .cfi_restore r7 174 .cfi_restore r8 175 .cfi_restore r10 176 .cfi_restore r11 177 .cfi_restore lr 178 .cfi_adjust_cfa_offset -40 179 .endm 180 181 /* 182 * Macro that sets up the callee save frame to conform with 183 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 184 * when core registers are already saved. 185 */ 186 .macro SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 187 @ 14 words of callee saves and args already saved. 188 vpush {d0-d15} @ 32 words, 2 for each of the 16 saved doubles. 189 .cfi_adjust_cfa_offset 128 190 sub sp, #8 @ 2 words of space, alignment padding and Method* 191 .cfi_adjust_cfa_offset 8 192 RUNTIME_CURRENT1 \rTemp @ Load Runtime::Current into rTemp. 193 @ Load kSaveEverything Method* into rTemp. 194 ldr \rTemp, [\rTemp, #\runtime_method_offset] 195 str \rTemp, [sp, #0] @ Place Method* at bottom of stack. 196 str sp, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] @ Place sp in Thread::Current()->top_quick_frame. 197 198 // Ugly compile-time check, but we only have the preprocessor. 199 #if (FRAME_SIZE_SAVE_EVERYTHING != 56 + 128 + 8) 200 #error "FRAME_SIZE_SAVE_EVERYTHING(ARM) size not as expected." 201 #endif 202 .endm 203 204 /* 205 * Macro that sets up the callee save frame to conform with 206 * Runtime::CreateCalleeSaveMethod(kSaveEverything) 207 */ 208 .macro SETUP_SAVE_EVERYTHING_FRAME rTemp, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 209 push {r0-r12, lr} @ 14 words of callee saves and args. 210 .cfi_adjust_cfa_offset 56 211 .cfi_rel_offset r0, 0 212 .cfi_rel_offset r1, 4 213 .cfi_rel_offset r2, 8 214 .cfi_rel_offset r3, 12 215 .cfi_rel_offset r4, 16 216 .cfi_rel_offset r5, 20 217 .cfi_rel_offset r6, 24 218 .cfi_rel_offset r7, 28 219 .cfi_rel_offset r8, 32 220 .cfi_rel_offset r9, 36 221 .cfi_rel_offset r10, 40 222 .cfi_rel_offset r11, 44 223 .cfi_rel_offset ip, 48 224 .cfi_rel_offset lr, 52 225 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED \rTemp, \runtime_method_offset 226 .endm 227 228 .macro RESTORE_SAVE_EVERYTHING_FRAME 229 add sp, #8 @ rewind sp 230 .cfi_adjust_cfa_offset -8 231 vpop {d0-d15} 232 .cfi_adjust_cfa_offset -128 233 pop {r0-r12, lr} @ 14 words of callee saves 234 .cfi_restore r0 235 .cfi_restore r1 236 .cfi_restore r2 237 .cfi_restore r3 238 .cfi_restore r4 239 .cfi_restore r5 240 .cfi_restore r6 241 .cfi_restore r7 242 .cfi_restore r8 243 .cfi_restore r9 244 .cfi_restore r10 245 .cfi_restore r11 246 .cfi_restore r12 247 .cfi_restore lr 248 .cfi_adjust_cfa_offset -56 249 .endm 250 251 .macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 252 add sp, #8 @ rewind sp 253 .cfi_adjust_cfa_offset -8 254 vpop {d0-d15} 255 .cfi_adjust_cfa_offset -128 256 add sp, #4 @ skip r0 257 .cfi_adjust_cfa_offset -4 258 .cfi_restore r0 @ debugger can no longer restore caller's r0 259 pop {r1-r12, lr} @ 13 words of callee saves 260 .cfi_restore r1 261 .cfi_restore r2 262 .cfi_restore r3 263 .cfi_restore r4 264 .cfi_restore r5 265 .cfi_restore r6 266 .cfi_restore r7 267 .cfi_restore r8 268 .cfi_restore r9 269 .cfi_restore r10 270 .cfi_restore r11 271 .cfi_restore r12 272 .cfi_restore lr 273 .cfi_adjust_cfa_offset -52 274 .endm 275 276 // Macro to refresh the Marking Register (R8). 277 // 278 // This macro must be called at the end of functions implementing 279 // entrypoints that possibly (directly or indirectly) perform a 280 // suspend check (before they return). 281 .macro REFRESH_MARKING_REGISTER 282 #if defined(USE_READ_BARRIER) && defined(USE_BAKER_READ_BARRIER) 283 ldr rMR, [rSELF, #THREAD_IS_GC_MARKING_OFFSET] 284 #endif 285 .endm 286 287 .macro RETURN_IF_RESULT_IS_ZERO 288 cbnz r0, 1f @ result non-zero branch over 289 bx lr @ return 290 1: 291 .endm 292 293 .macro RETURN_IF_RESULT_IS_NON_ZERO 294 cbz r0, 1f @ result zero branch over 295 bx lr @ return 296 1: 297 .endm 298 299 /* 300 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 301 * exception is Thread::Current()->exception_ when the runtime method frame is ready. 302 */ 303 .macro DELIVER_PENDING_EXCEPTION_FRAME_READY 304 mov r0, r9 @ pass Thread::Current 305 bl artDeliverPendingExceptionFromCode @ artDeliverPendingExceptionFromCode(Thread*) 306 .endm 307 308 /* 309 * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending 310 * exception is Thread::Current()->exception_. 311 */ 312 .macro DELIVER_PENDING_EXCEPTION 313 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save callee saves for throw 314 DELIVER_PENDING_EXCEPTION_FRAME_READY 315 .endm 316 317 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name 318 .extern \cxx_name 319 ENTRY \c_name 320 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0 @ save all registers as basis for long jump context 321 mov r0, r9 @ pass Thread::Current 322 bl \cxx_name @ \cxx_name(Thread*) 323 END \c_name 324 .endm 325 326 .macro NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 327 .extern \cxx_name 328 ENTRY \c_name 329 SETUP_SAVE_EVERYTHING_FRAME r0 @ save all registers as basis for long jump context 330 mov r0, r9 @ pass Thread::Current 331 bl \cxx_name @ \cxx_name(Thread*) 332 END \c_name 333 .endm 334 335 .macro ONE_ARG_RUNTIME_EXCEPTION c_name, cxx_name 336 .extern \cxx_name 337 ENTRY \c_name 338 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r1 @ save all registers as basis for long jump context 339 mov r1, r9 @ pass Thread::Current 340 bl \cxx_name @ \cxx_name(Thread*) 341 END \c_name 342 .endm 343 344 .macro TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING c_name, cxx_name 345 .extern \cxx_name 346 ENTRY \c_name 347 SETUP_SAVE_EVERYTHING_FRAME r2 @ save all registers as basis for long jump context 348 mov r2, r9 @ pass Thread::Current 349 bl \cxx_name @ \cxx_name(Thread*) 350 END \c_name 351 .endm 352 353 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg 354 ldr \reg, [r9, #THREAD_EXCEPTION_OFFSET] // Get exception field. 355 cbnz \reg, 1f 356 bx lr 357 1: 358 DELIVER_PENDING_EXCEPTION 359 .endm 360 361 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 362 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r1 363 .endm 364 365 .macro RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 366 RETURN_IF_RESULT_IS_ZERO 367 DELIVER_PENDING_EXCEPTION 368 .endm 369 370 .macro RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 371 RETURN_IF_RESULT_IS_NON_ZERO 372 DELIVER_PENDING_EXCEPTION 373 .endm 374 375 // Macros taking opportunity of code similarities for downcalls. 376 .macro ONE_ARG_REF_DOWNCALL name, entrypoint, return 377 .extern \entrypoint 378 ENTRY \name 379 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 380 mov r1, r9 @ pass Thread::Current 381 bl \entrypoint @ (uint32_t field_idx, Thread*) 382 RESTORE_SAVE_REFS_ONLY_FRAME 383 REFRESH_MARKING_REGISTER 384 \return 385 END \name 386 .endm 387 388 .macro TWO_ARG_REF_DOWNCALL name, entrypoint, return 389 .extern \entrypoint 390 ENTRY \name 391 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 392 mov r2, r9 @ pass Thread::Current 393 bl \entrypoint @ (field_idx, Object*, Thread*) 394 RESTORE_SAVE_REFS_ONLY_FRAME 395 REFRESH_MARKING_REGISTER 396 \return 397 END \name 398 .endm 399 400 .macro THREE_ARG_REF_DOWNCALL name, entrypoint, return 401 .extern \entrypoint 402 ENTRY \name 403 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 404 mov r3, r9 @ pass Thread::Current 405 bl \entrypoint @ (field_idx, Object*, new_val, Thread*) 406 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 407 REFRESH_MARKING_REGISTER 408 \return 409 END \name 410 .endm 411 412 /* 413 * Called by managed code, saves callee saves and then calls artThrowException 414 * that will place a mock Method* at the bottom of the stack. Arg1 holds the exception. 415 */ 416 ONE_ARG_RUNTIME_EXCEPTION art_quick_deliver_exception, artDeliverExceptionFromCode 417 418 /* 419 * Called by managed code to create and deliver a NullPointerException. 420 */ 421 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_null_pointer_exception, artThrowNullPointerExceptionFromCode 422 423 /* 424 * Call installed by a signal handler to create and deliver a NullPointerException. 425 */ 426 .extern art_quick_throw_null_pointer_exception_from_signal 427 ENTRY art_quick_throw_null_pointer_exception_from_signal 428 // The fault handler pushes the gc map address, i.e. "return address", to stack 429 // and passes the fault address in LR. So we need to set up the CFI info accordingly. 430 .cfi_def_cfa_offset __SIZEOF_POINTER__ 431 .cfi_rel_offset lr, 0 432 push {r0-r12} @ 13 words of callee saves and args; LR already saved. 433 .cfi_adjust_cfa_offset 52 434 .cfi_rel_offset r0, 0 435 .cfi_rel_offset r1, 4 436 .cfi_rel_offset r2, 8 437 .cfi_rel_offset r3, 12 438 .cfi_rel_offset r4, 16 439 .cfi_rel_offset r5, 20 440 .cfi_rel_offset r6, 24 441 .cfi_rel_offset r7, 28 442 .cfi_rel_offset r8, 32 443 .cfi_rel_offset r9, 36 444 .cfi_rel_offset r10, 40 445 .cfi_rel_offset r11, 44 446 .cfi_rel_offset ip, 48 447 448 @ save all registers as basis for long jump context 449 SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1 450 mov r0, lr @ pass the fault address stored in LR by the fault handler. 451 mov r1, r9 @ pass Thread::Current 452 bl artThrowNullPointerExceptionFromSignal @ (Thread*) 453 END art_quick_throw_null_pointer_exception_from_signal 454 455 /* 456 * Called by managed code to create and deliver an ArithmeticException. 457 */ 458 NO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_div_zero, artThrowDivZeroFromCode 459 460 /* 461 * Called by managed code to create and deliver an ArrayIndexOutOfBoundsException. Arg1 holds 462 * index, arg2 holds limit. 463 */ 464 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_array_bounds, artThrowArrayBoundsFromCode 465 466 /* 467 * Called by managed code to create and deliver a StringIndexOutOfBoundsException 468 * as if thrown from a call to String.charAt(). Arg1 holds index, arg2 holds limit. 469 */ 470 TWO_ARG_RUNTIME_EXCEPTION_SAVE_EVERYTHING art_quick_throw_string_bounds, artThrowStringBoundsFromCode 471 472 /* 473 * Called by managed code to create and deliver a StackOverflowError. 474 */ 475 NO_ARG_RUNTIME_EXCEPTION art_quick_throw_stack_overflow, artThrowStackOverflowFromCode 476 477 /* 478 * All generated callsites for interface invokes and invocation slow paths will load arguments 479 * as usual - except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain 480 * the method_idx. This wrapper will save arg1-arg3, and call the appropriate C helper. 481 * NOTE: "this" is first visible argument of the target, and so can be found in arg1/r1. 482 * 483 * The helper will attempt to locate the target and return a 64-bit result in r0/r1 consisting 484 * of the target Method* in r0 and method->code_ in r1. 485 * 486 * If unsuccessful, the helper will return null/null. There will bea pending exception in the 487 * thread and we branch to another stub to deliver it. 488 * 489 * On success this wrapper will restore arguments and *jump* to the target, leaving the lr 490 * pointing back to the original caller. 491 * 492 * Clobbers IP (R12). 493 */ 494 .macro INVOKE_TRAMPOLINE_BODY cxx_name 495 .extern \cxx_name 496 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 @ save callee saves in case allocation triggers GC 497 mov r2, r9 @ pass Thread::Current 498 mov r3, sp 499 bl \cxx_name @ (method_idx, this, Thread*, SP) 500 mov r12, r1 @ save Method*->code_ 501 RESTORE_SAVE_REFS_AND_ARGS_FRAME 502 REFRESH_MARKING_REGISTER 503 cbz r0, 1f @ did we find the target? if not go to exception delivery 504 bx r12 @ tail call to target 505 1: 506 DELIVER_PENDING_EXCEPTION 507 .endm 508 .macro INVOKE_TRAMPOLINE c_name, cxx_name 509 ENTRY \c_name 510 INVOKE_TRAMPOLINE_BODY \cxx_name 511 END \c_name 512 .endm 513 514 INVOKE_TRAMPOLINE art_quick_invoke_interface_trampoline_with_access_check, artInvokeInterfaceTrampolineWithAccessCheck 515 516 INVOKE_TRAMPOLINE art_quick_invoke_static_trampoline_with_access_check, artInvokeStaticTrampolineWithAccessCheck 517 INVOKE_TRAMPOLINE art_quick_invoke_direct_trampoline_with_access_check, artInvokeDirectTrampolineWithAccessCheck 518 INVOKE_TRAMPOLINE art_quick_invoke_super_trampoline_with_access_check, artInvokeSuperTrampolineWithAccessCheck 519 INVOKE_TRAMPOLINE art_quick_invoke_virtual_trampoline_with_access_check, artInvokeVirtualTrampolineWithAccessCheck 520 521 /* 522 * Quick invocation stub internal. 523 * On entry: 524 * r0 = method pointer 525 * r1 = argument array or null for no argument methods 526 * r2 = size of argument array in bytes 527 * r3 = (managed) thread pointer 528 * [sp] = JValue* result 529 * [sp + 4] = result_in_float 530 * [sp + 8] = core register argument array 531 * [sp + 12] = fp register argument array 532 * +-------------------------+ 533 * | uint32_t* fp_reg_args | 534 * | uint32_t* core_reg_args | 535 * | result_in_float | <- Caller frame 536 * | Jvalue* result | 537 * +-------------------------+ 538 * | lr | 539 * | r11 | 540 * | r9 | 541 * | r4 | <- r11 542 * +-------------------------+ 543 * | uint32_t out[n-1] | 544 * | : : | Outs 545 * | uint32_t out[0] | 546 * | StackRef<ArtMethod> | <- SP value=null 547 * +-------------------------+ 548 */ 549 ENTRY art_quick_invoke_stub_internal 550 SPILL_ALL_CALLEE_SAVE_GPRS @ spill regs (9) 551 mov r11, sp @ save the stack pointer 552 .cfi_def_cfa_register r11 553 554 mov r9, r3 @ move managed thread pointer into r9 555 556 add r4, r2, #4 @ create space for method pointer in frame 557 sub r4, sp, r4 @ reserve & align *stack* to 16 bytes: native calling 558 and r4, #0xFFFFFFF0 @ convention only aligns to 8B, so we have to ensure ART 559 mov sp, r4 @ 16B alignment ourselves. 560 561 mov r4, r0 @ save method* 562 add r0, sp, #4 @ pass stack pointer + method ptr as dest for memcpy 563 bl memcpy @ memcpy (dest, src, bytes) 564 mov ip, #0 @ set ip to 0 565 str ip, [sp] @ store null for method* at bottom of frame 566 567 ldr ip, [r11, #48] @ load fp register argument array pointer 568 vldm ip, {s0-s15} @ copy s0 - s15 569 570 ldr ip, [r11, #44] @ load core register argument array pointer 571 mov r0, r4 @ restore method* 572 add ip, ip, #4 @ skip r0 573 ldm ip, {r1-r3} @ copy r1 - r3 574 575 REFRESH_MARKING_REGISTER 576 577 ldr ip, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] @ get pointer to the code 578 blx ip @ call the method 579 580 mov sp, r11 @ restore the stack pointer 581 .cfi_def_cfa_register sp 582 583 ldr r4, [sp, #40] @ load result_is_float 584 ldr r9, [sp, #36] @ load the result pointer 585 cmp r4, #0 586 ite eq 587 strdeq r0, [r9] @ store r0/r1 into result pointer 588 vstrne d0, [r9] @ store s0-s1/d0 into result pointer 589 590 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} @ restore spill regs 591 END art_quick_invoke_stub_internal 592 593 /* 594 * On stack replacement stub. 595 * On entry: 596 * r0 = stack to copy 597 * r1 = size of stack 598 * r2 = pc to call 599 * r3 = JValue* result 600 * [sp] = shorty 601 * [sp + 4] = thread 602 */ 603 ENTRY art_quick_osr_stub 604 SPILL_ALL_CALLEE_SAVE_GPRS @ Spill regs (9) 605 SAVE_SIZE=9*4 606 mov r11, sp @ Save the stack pointer 607 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 608 .cfi_remember_state 609 mov r10, r1 @ Save size of stack 610 ldr r9, [r11, #40] @ Move managed thread pointer into r9 611 REFRESH_MARKING_REGISTER 612 mov r6, r2 @ Save the pc to call 613 sub r7, sp, #12 @ Reserve space for stack pointer, 614 @ JValue* result, and ArtMethod* slot. 615 and r7, #0xFFFFFFF0 @ Align stack pointer 616 mov sp, r7 @ Update stack pointer 617 str r11, [sp, #4] @ Save old stack pointer 618 str r3, [sp, #8] @ Save JValue* result 619 mov ip, #0 620 str ip, [sp] @ Store null for ArtMethod* at bottom of frame 621 // r11 isn't properly spilled in the osr method, so we need use DWARF expression. 622 // NB: the CFI must be before the call since this is the address gdb will lookup. 623 // NB: gdb expects that cfa_expression returns the CFA value (not address to it). 624 .cfi_escape /* CFA = [sp + 4] + SAVE_SIZE */ \ 625 0x0f, 6, /* DW_CFA_def_cfa_expression(len) */ \ 626 0x92, 13, 4, /* DW_OP_bregx(reg,offset) */ \ 627 0x06, /* DW_OP_deref */ \ 628 0x23, SAVE_SIZE /* DW_OP_plus_uconst(val) */ 629 bl .Losr_entry @ Call the method 630 ldr r10, [sp, #8] @ Restore JValue* result 631 ldr sp, [sp, #4] @ Restore saved stack pointer 632 .cfi_def_cfa sp, SAVE_SIZE @ CFA = sp + SAVE_SIZE 633 ldr r4, [sp, #36] @ load shorty 634 ldrb r4, [r4, #0] @ load return type 635 cmp r4, #68 @ Test if result type char == 'D'. 636 beq .Losr_fp_result 637 cmp r4, #70 @ Test if result type char == 'F'. 638 beq .Losr_fp_result 639 strd r0, [r10] @ Store r0/r1 into result pointer 640 b .Losr_exit 641 .Losr_fp_result: 642 vstr d0, [r10] @ Store s0-s1/d0 into result pointer 643 .Losr_exit: 644 pop {r4, r5, r6, r7, r8, r9, r10, r11, pc} 645 .Losr_entry: 646 .cfi_restore_state 647 .cfi_def_cfa r11, SAVE_SIZE @ CFA = r11 + SAVE_SIZE 648 sub sp, sp, r10 @ Reserve space for callee stack 649 sub r10, r10, #4 650 str lr, [sp, r10] @ Store link register per the compiler ABI 651 mov r2, r10 652 mov r1, r0 653 mov r0, sp 654 bl memcpy @ memcpy (dest r0, src r1, bytes r2) 655 bx r6 656 END art_quick_osr_stub 657 658 /* 659 * On entry r0 is uint32_t* gprs_ and r1 is uint32_t* fprs_ 660 */ 661 ARM_ENTRY art_quick_do_long_jump 662 vldm r1, {s0-s31} @ load all fprs from argument fprs_ 663 ldr r2, [r0, #60] @ r2 = r15 (PC from gprs_ 60=4*15) 664 ldr r14, [r0, #56] @ (LR from gprs_ 56=4*14) 665 add r0, r0, #12 @ increment r0 to skip gprs_[0..2] 12=4*3 666 ldm r0, {r3-r13} @ load remaining gprs from argument gprs_ 667 REFRESH_MARKING_REGISTER 668 ldr r0, [r0, #-12] @ load r0 value 669 mov r1, #0 @ clear result register r1 670 bx r2 @ do long jump 671 END art_quick_do_long_jump 672 673 /* 674 * Entry from managed code that calls artHandleFillArrayDataFromCode and delivers exception on 675 * failure. 676 */ 677 TWO_ARG_REF_DOWNCALL art_quick_handle_fill_data, artHandleFillArrayDataFromCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 678 679 /* 680 * Entry from managed code that calls artLockObjectFromCode, may block for GC. r0 holds the 681 * possibly null object to lock. 682 */ 683 .extern artLockObjectFromCode 684 ENTRY art_quick_lock_object 685 cbz r0, .Lslow_lock 686 .Lretry_lock: 687 ldr r2, [r9, #THREAD_ID_OFFSET] 688 ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 689 mov r3, r1 690 and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits 691 cbnz r3, .Lnot_unlocked @ already thin locked 692 @ unlocked case - r1: original lock word that's zero except for the read barrier bits. 693 orr r2, r1, r2 @ r2 holds thread id with count of 0 with preserved read barrier bits 694 strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 695 cbnz r3, .Llock_strex_fail @ store failed, retry 696 dmb ish @ full (LoadLoad|LoadStore) memory barrier 697 bx lr 698 .Lnot_unlocked: @ r1: original lock word, r2: thread_id with count of 0 and zero read barrier bits 699 lsr r3, r1, LOCK_WORD_STATE_SHIFT 700 cbnz r3, .Lslow_lock @ if either of the top two bits are set, go slow path 701 eor r2, r1, r2 @ lock_word.ThreadId() ^ self->ThreadId() 702 uxth r2, r2 @ zero top 16 bits 703 cbnz r2, .Lslow_lock @ lock word and self thread id's match -> recursive lock 704 @ else contention, go to slow path 705 mov r3, r1 @ copy the lock word to check count overflow. 706 and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits. 707 add r2, r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count in lock word placing in r2 to check overflow 708 lsr r3, r2, #LOCK_WORD_GC_STATE_SHIFT @ if the first gc state bit is set, we overflowed. 709 cbnz r3, .Lslow_lock @ if we overflow the count go slow path 710 add r2, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ increment count for real 711 strex r3, r2, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 712 cbnz r3, .Llock_strex_fail @ strex failed, retry 713 bx lr 714 .Llock_strex_fail: 715 b .Lretry_lock @ retry 716 .Lslow_lock: 717 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block 718 mov r1, r9 @ pass Thread::Current 719 bl artLockObjectFromCode @ (Object* obj, Thread*) 720 RESTORE_SAVE_REFS_ONLY_FRAME 721 REFRESH_MARKING_REGISTER 722 RETURN_IF_RESULT_IS_ZERO 723 DELIVER_PENDING_EXCEPTION 724 END art_quick_lock_object 725 726 ENTRY art_quick_lock_object_no_inline 727 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case we block 728 mov r1, r9 @ pass Thread::Current 729 bl artLockObjectFromCode @ (Object* obj, Thread*) 730 RESTORE_SAVE_REFS_ONLY_FRAME 731 REFRESH_MARKING_REGISTER 732 RETURN_IF_RESULT_IS_ZERO 733 DELIVER_PENDING_EXCEPTION 734 END art_quick_lock_object_no_inline 735 736 /* 737 * Entry from managed code that calls artUnlockObjectFromCode and delivers exception on failure. 738 * r0 holds the possibly null object to lock. 739 */ 740 .extern artUnlockObjectFromCode 741 ENTRY art_quick_unlock_object 742 cbz r0, .Lslow_unlock 743 .Lretry_unlock: 744 #ifndef USE_READ_BARRIER 745 ldr r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 746 #else 747 ldrex r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ Need to use atomic instructions for read barrier 748 #endif 749 lsr r2, r1, #LOCK_WORD_STATE_SHIFT 750 cbnz r2, .Lslow_unlock @ if either of the top two bits are set, go slow path 751 ldr r2, [r9, #THREAD_ID_OFFSET] 752 mov r3, r1 @ copy lock word to check thread id equality 753 and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits 754 eor r3, r3, r2 @ lock_word.ThreadId() ^ self->ThreadId() 755 uxth r3, r3 @ zero top 16 bits 756 cbnz r3, .Lslow_unlock @ do lock word and self thread id's match? 757 mov r3, r1 @ copy lock word to detect transition to unlocked 758 and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED @ zero the gc bits 759 cmp r3, #LOCK_WORD_THIN_LOCK_COUNT_ONE 760 bpl .Lrecursive_thin_unlock 761 @ transition to unlocked 762 mov r3, r1 763 and r3, #LOCK_WORD_GC_STATE_MASK_SHIFTED @ r3: zero except for the preserved gc bits 764 dmb ish @ full (LoadStore|StoreStore) memory barrier 765 #ifndef USE_READ_BARRIER 766 str r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 767 #else 768 strex r2, r3, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 769 cbnz r2, .Lunlock_strex_fail @ store failed, retry 770 #endif 771 bx lr 772 .Lrecursive_thin_unlock: @ r1: original lock word 773 sub r1, r1, #LOCK_WORD_THIN_LOCK_COUNT_ONE @ decrement count 774 #ifndef USE_READ_BARRIER 775 str r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 776 #else 777 strex r2, r1, [r0, #MIRROR_OBJECT_LOCK_WORD_OFFSET] @ strex necessary for read barrier bits 778 cbnz r2, .Lunlock_strex_fail @ store failed, retry 779 #endif 780 bx lr 781 .Lunlock_strex_fail: 782 b .Lretry_unlock @ retry 783 .Lslow_unlock: 784 @ save callee saves in case exception allocation triggers GC 785 SETUP_SAVE_REFS_ONLY_FRAME r1 786 mov r1, r9 @ pass Thread::Current 787 bl artUnlockObjectFromCode @ (Object* obj, Thread*) 788 RESTORE_SAVE_REFS_ONLY_FRAME 789 REFRESH_MARKING_REGISTER 790 RETURN_IF_RESULT_IS_ZERO 791 DELIVER_PENDING_EXCEPTION 792 END art_quick_unlock_object 793 794 ENTRY art_quick_unlock_object_no_inline 795 @ save callee saves in case exception allocation triggers GC 796 SETUP_SAVE_REFS_ONLY_FRAME r1 797 mov r1, r9 @ pass Thread::Current 798 bl artUnlockObjectFromCode @ (Object* obj, Thread*) 799 RESTORE_SAVE_REFS_ONLY_FRAME 800 REFRESH_MARKING_REGISTER 801 RETURN_IF_RESULT_IS_ZERO 802 DELIVER_PENDING_EXCEPTION 803 END art_quick_unlock_object_no_inline 804 805 /* 806 * Entry from managed code that calls artInstanceOfFromCode and on failure calls 807 * artThrowClassCastExceptionForObject. 808 */ 809 .extern artInstanceOfFromCode 810 .extern artThrowClassCastExceptionForObject 811 ENTRY art_quick_check_instance_of 812 push {r0-r2, lr} @ save arguments, padding (r2) and link register 813 .cfi_adjust_cfa_offset 16 814 .cfi_rel_offset r0, 0 815 .cfi_rel_offset r1, 4 816 .cfi_rel_offset r2, 8 817 .cfi_rel_offset lr, 12 818 bl artInstanceOfFromCode 819 cbz r0, .Lthrow_class_cast_exception 820 pop {r0-r2, pc} 821 822 .Lthrow_class_cast_exception: 823 pop {r0-r2, lr} 824 .cfi_adjust_cfa_offset -16 825 .cfi_restore r0 826 .cfi_restore r1 827 .cfi_restore r2 828 .cfi_restore lr 829 830 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2 @ save all registers as basis for long jump context 831 mov r2, r9 @ pass Thread::Current 832 bl artThrowClassCastExceptionForObject @ (Object*, Class*, Thread*) 833 bkpt 834 END art_quick_check_instance_of 835 836 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude. 837 .macro POP_REG_NE rReg, offset, rExclude 838 .ifnc \rReg, \rExclude 839 ldr \rReg, [sp, #\offset] @ restore rReg 840 .cfi_restore \rReg 841 .endif 842 .endm 843 844 // Save rReg's value to [sp, #offset]. 845 .macro PUSH_REG rReg, offset 846 str \rReg, [sp, #\offset] @ save rReg 847 .cfi_rel_offset \rReg, \offset 848 .endm 849 850 /* 851 * Macro to insert read barrier, only used in art_quick_aput_obj. 852 * rObj and rDest are registers, offset is a defined literal such as MIRROR_OBJECT_CLASS_OFFSET. 853 * TODO: When read barrier has a fast path, add heap unpoisoning support for the fast path. 854 */ 855 .macro READ_BARRIER rDest, rObj, offset 856 #ifdef USE_READ_BARRIER 857 push {r0-r3, ip, lr} @ 6 words for saved registers (used in art_quick_aput_obj) 858 .cfi_adjust_cfa_offset 24 859 .cfi_rel_offset r0, 0 860 .cfi_rel_offset r1, 4 861 .cfi_rel_offset r2, 8 862 .cfi_rel_offset r3, 12 863 .cfi_rel_offset ip, 16 864 .cfi_rel_offset lr, 20 865 sub sp, #8 @ push padding 866 .cfi_adjust_cfa_offset 8 867 @ mov r0, \rRef @ pass ref in r0 (no-op for now since parameter ref is unused) 868 .ifnc \rObj, r1 869 mov r1, \rObj @ pass rObj 870 .endif 871 mov r2, #\offset @ pass offset 872 bl artReadBarrierSlow @ artReadBarrierSlow(ref, rObj, offset) 873 @ No need to unpoison return value in r0, artReadBarrierSlow() would do the unpoisoning. 874 .ifnc \rDest, r0 875 mov \rDest, r0 @ save return value in rDest 876 .endif 877 add sp, #8 @ pop padding 878 .cfi_adjust_cfa_offset -8 879 POP_REG_NE r0, 0, \rDest @ conditionally restore saved registers 880 POP_REG_NE r1, 4, \rDest 881 POP_REG_NE r2, 8, \rDest 882 POP_REG_NE r3, 12, \rDest 883 POP_REG_NE ip, 16, \rDest 884 add sp, #20 885 .cfi_adjust_cfa_offset -20 886 pop {lr} @ restore lr 887 .cfi_adjust_cfa_offset -4 888 .cfi_restore lr 889 #else 890 ldr \rDest, [\rObj, #\offset] 891 UNPOISON_HEAP_REF \rDest 892 #endif // USE_READ_BARRIER 893 .endm 894 895 #ifdef USE_READ_BARRIER 896 .extern artReadBarrierSlow 897 #endif 898 .hidden art_quick_aput_obj 899 ENTRY art_quick_aput_obj 900 #ifdef USE_READ_BARRIER 901 @ The offset to .Ldo_aput_null is too large to use cbz due to expansion from READ_BARRIER macro. 902 tst r2, r2 903 beq .Ldo_aput_null 904 #else 905 cbz r2, .Ldo_aput_null 906 #endif // USE_READ_BARRIER 907 READ_BARRIER r3, r0, MIRROR_OBJECT_CLASS_OFFSET 908 READ_BARRIER ip, r2, MIRROR_OBJECT_CLASS_OFFSET 909 READ_BARRIER r3, r3, MIRROR_CLASS_COMPONENT_TYPE_OFFSET 910 cmp r3, ip @ value's type == array's component type - trivial assignability 911 bne .Lcheck_assignability 912 .Ldo_aput: 913 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 914 POISON_HEAP_REF r2 915 str r2, [r3, r1, lsl #2] 916 ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET] 917 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 918 strb r3, [r3, r0] 919 blx lr 920 .Ldo_aput_null: 921 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 922 str r2, [r3, r1, lsl #2] 923 blx lr 924 .Lcheck_assignability: 925 push {r0-r2, lr} @ save arguments 926 .cfi_adjust_cfa_offset 16 927 .cfi_rel_offset r0, 0 928 .cfi_rel_offset r1, 4 929 .cfi_rel_offset r2, 8 930 .cfi_rel_offset lr, 12 931 mov r1, ip 932 mov r0, r3 933 bl artIsAssignableFromCode 934 cbz r0, .Lthrow_array_store_exception 935 pop {r0-r2, lr} 936 .cfi_restore r0 937 .cfi_restore r1 938 .cfi_restore r2 939 .cfi_restore lr 940 .cfi_adjust_cfa_offset -16 941 add r3, r0, #MIRROR_OBJECT_ARRAY_DATA_OFFSET 942 POISON_HEAP_REF r2 943 str r2, [r3, r1, lsl #2] 944 ldr r3, [r9, #THREAD_CARD_TABLE_OFFSET] 945 lsr r0, r0, #CARD_TABLE_CARD_SHIFT 946 strb r3, [r3, r0] 947 blx lr 948 .Lthrow_array_store_exception: 949 pop {r0-r2, lr} 950 /* No need to repeat restore cfi directives, the ones above apply here. */ 951 SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r3 952 mov r1, r2 953 mov r2, r9 @ pass Thread::Current 954 bl artThrowArrayStoreException @ (Class*, Class*, Thread*) 955 bkpt @ unreached 956 END art_quick_aput_obj 957 958 // Macro to facilitate adding new allocation entrypoints. 959 .macro ONE_ARG_DOWNCALL name, entrypoint, return 960 .extern \entrypoint 961 ENTRY \name 962 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves in case of GC 963 mov r1, r9 @ pass Thread::Current 964 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 965 RESTORE_SAVE_REFS_ONLY_FRAME 966 REFRESH_MARKING_REGISTER 967 \return 968 END \name 969 .endm 970 971 // Macro to facilitate adding new allocation entrypoints. 972 .macro TWO_ARG_DOWNCALL name, entrypoint, return 973 .extern \entrypoint 974 ENTRY \name 975 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 976 mov r2, r9 @ pass Thread::Current 977 bl \entrypoint @ (uint32_t type_idx, Method* method, Thread*) 978 RESTORE_SAVE_REFS_ONLY_FRAME 979 REFRESH_MARKING_REGISTER 980 \return 981 END \name 982 .endm 983 984 // Macro to facilitate adding new array allocation entrypoints. 985 .macro THREE_ARG_DOWNCALL name, entrypoint, return 986 .extern \entrypoint 987 ENTRY \name 988 SETUP_SAVE_REFS_ONLY_FRAME r3 @ save callee saves in case of GC 989 mov r3, r9 @ pass Thread::Current 990 @ (uint32_t type_idx, Method* method, int32_t component_count, Thread*) 991 bl \entrypoint 992 RESTORE_SAVE_REFS_ONLY_FRAME 993 REFRESH_MARKING_REGISTER 994 \return 995 END \name 996 .endm 997 998 // Macro to facilitate adding new allocation entrypoints. 999 .macro FOUR_ARG_DOWNCALL name, entrypoint, return 1000 .extern \entrypoint 1001 ENTRY \name 1002 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1003 str r9, [sp, #-16]! @ expand the frame and pass Thread::Current 1004 .cfi_adjust_cfa_offset 16 1005 bl \entrypoint 1006 add sp, #16 @ strip the extra frame 1007 .cfi_adjust_cfa_offset -16 1008 RESTORE_SAVE_REFS_ONLY_FRAME 1009 REFRESH_MARKING_REGISTER 1010 \return 1011 END \name 1012 .endm 1013 1014 // Macro for string and type resolution and initialization. 1015 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL name, entrypoint, runtime_method_offset = RUNTIME_SAVE_EVERYTHING_METHOD_OFFSET 1016 .extern \entrypoint 1017 ENTRY \name 1018 SETUP_SAVE_EVERYTHING_FRAME r1, \runtime_method_offset @ save everything in case of GC 1019 mov r1, r9 @ pass Thread::Current 1020 bl \entrypoint @ (uint32_t index, Thread*) 1021 cbz r0, 1f @ If result is null, deliver the OOME. 1022 .cfi_remember_state 1023 RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0 1024 REFRESH_MARKING_REGISTER 1025 bx lr 1026 .cfi_restore_state 1027 1: 1028 DELIVER_PENDING_EXCEPTION_FRAME_READY 1029 END \name 1030 .endm 1031 1032 .macro ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT name, entrypoint 1033 ONE_ARG_SAVE_EVERYTHING_DOWNCALL \name, \entrypoint, RUNTIME_SAVE_EVERYTHING_FOR_CLINIT_METHOD_OFFSET 1034 .endm 1035 1036 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_static_storage, artInitializeStaticStorageFromCode 1037 ONE_ARG_SAVE_EVERYTHING_DOWNCALL_FOR_CLINIT art_quick_initialize_type, artInitializeTypeFromCode 1038 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_initialize_type_and_verify_access, artInitializeTypeAndVerifyAccessFromCode 1039 ONE_ARG_SAVE_EVERYTHING_DOWNCALL art_quick_resolve_string, artResolveStringFromCode 1040 1041 // Note: Functions `art{Get,Set}<Kind>{Static,Instance}FromCompiledCode` are 1042 // defined with a macro in runtime/entrypoints/quick/quick_field_entrypoints.cc. 1043 1044 /* 1045 * Called by managed code to resolve a static field and load a non-wide value. 1046 */ 1047 ONE_ARG_REF_DOWNCALL art_quick_get_byte_static, artGetByteStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1048 ONE_ARG_REF_DOWNCALL art_quick_get_boolean_static, artGetBooleanStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1049 ONE_ARG_REF_DOWNCALL art_quick_get_short_static, artGetShortStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1050 ONE_ARG_REF_DOWNCALL art_quick_get_char_static, artGetCharStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1051 ONE_ARG_REF_DOWNCALL art_quick_get32_static, artGet32StaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1052 ONE_ARG_REF_DOWNCALL art_quick_get_obj_static, artGetObjStaticFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1053 /* 1054 * Called by managed code to resolve a static field and load a 64-bit primitive value. 1055 */ 1056 .extern artGet64StaticFromCompiledCode 1057 ENTRY art_quick_get64_static 1058 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1059 mov r1, r9 @ pass Thread::Current 1060 bl artGet64StaticFromCompiledCode @ (uint32_t field_idx, Thread*) 1061 ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1062 RESTORE_SAVE_REFS_ONLY_FRAME 1063 REFRESH_MARKING_REGISTER 1064 cbnz r2, 1f @ success if no exception pending 1065 bx lr @ return on success 1066 1: 1067 DELIVER_PENDING_EXCEPTION 1068 END art_quick_get64_static 1069 1070 /* 1071 * Called by managed code to resolve an instance field and load a non-wide value. 1072 */ 1073 TWO_ARG_REF_DOWNCALL art_quick_get_byte_instance, artGetByteInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1074 TWO_ARG_REF_DOWNCALL art_quick_get_boolean_instance, artGetBooleanInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1075 TWO_ARG_REF_DOWNCALL art_quick_get_short_instance, artGetShortInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1076 TWO_ARG_REF_DOWNCALL art_quick_get_char_instance, artGetCharInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1077 TWO_ARG_REF_DOWNCALL art_quick_get32_instance, artGet32InstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1078 TWO_ARG_REF_DOWNCALL art_quick_get_obj_instance, artGetObjInstanceFromCompiledCode, RETURN_OR_DELIVER_PENDING_EXCEPTION_R1 1079 /* 1080 * Called by managed code to resolve an instance field and load a 64-bit primitive value. 1081 */ 1082 .extern artGet64InstanceFromCompiledCode 1083 ENTRY art_quick_get64_instance 1084 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1085 mov r2, r9 @ pass Thread::Current 1086 bl artGet64InstanceFromCompiledCode @ (field_idx, Object*, Thread*) 1087 ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1088 RESTORE_SAVE_REFS_ONLY_FRAME 1089 REFRESH_MARKING_REGISTER 1090 cbnz r2, 1f @ success if no exception pending 1091 bx lr @ return on success 1092 1: 1093 DELIVER_PENDING_EXCEPTION 1094 END art_quick_get64_instance 1095 1096 /* 1097 * Called by managed code to resolve a static field and store a value. 1098 */ 1099 TWO_ARG_REF_DOWNCALL art_quick_set8_static, artSet8StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1100 TWO_ARG_REF_DOWNCALL art_quick_set16_static, artSet16StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1101 TWO_ARG_REF_DOWNCALL art_quick_set32_static, artSet32StaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1102 TWO_ARG_REF_DOWNCALL art_quick_set_obj_static, artSetObjStaticFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1103 1104 /* 1105 * Called by managed code to resolve an instance field and store a non-wide value. 1106 */ 1107 THREE_ARG_REF_DOWNCALL art_quick_set8_instance, artSet8InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1108 THREE_ARG_REF_DOWNCALL art_quick_set16_instance, artSet16InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1109 THREE_ARG_REF_DOWNCALL art_quick_set32_instance, artSet32InstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1110 THREE_ARG_REF_DOWNCALL art_quick_set_obj_instance, artSetObjInstanceFromCompiledCode, RETURN_IF_RESULT_IS_ZERO_OR_DELIVER 1111 1112 /* 1113 * Called by managed code to resolve an instance field and store a wide value. 1114 */ 1115 .extern artSet64InstanceFromCompiledCode 1116 ENTRY art_quick_set64_instance 1117 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1118 @ r2:r3 contain the wide argument 1119 str r9, [sp, #-16]! @ expand the frame and pass Thread::Current 1120 .cfi_adjust_cfa_offset 16 1121 bl artSet64InstanceFromCompiledCode @ (field_idx, Object*, new_val, Thread*) 1122 add sp, #16 @ release out args 1123 .cfi_adjust_cfa_offset -16 1124 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1125 REFRESH_MARKING_REGISTER 1126 RETURN_IF_RESULT_IS_ZERO 1127 DELIVER_PENDING_EXCEPTION 1128 END art_quick_set64_instance 1129 1130 .extern artSet64StaticFromCompiledCode 1131 ENTRY art_quick_set64_static 1132 SETUP_SAVE_REFS_ONLY_FRAME r12 @ save callee saves in case of GC 1133 @ r2:r3 contain the wide argument 1134 str r9, [sp, #-16]! @ expand the frame and pass Thread::Current 1135 .cfi_adjust_cfa_offset 16 1136 bl artSet64StaticFromCompiledCode @ (field_idx, new_val, Thread*) 1137 add sp, #16 @ release out args 1138 .cfi_adjust_cfa_offset -16 1139 RESTORE_SAVE_REFS_ONLY_FRAME @ TODO: we can clearly save an add here 1140 REFRESH_MARKING_REGISTER 1141 RETURN_IF_RESULT_IS_ZERO 1142 DELIVER_PENDING_EXCEPTION 1143 END art_quick_set64_static 1144 1145 // Generate the allocation entrypoints for each allocator. 1146 GENERATE_ALLOC_ENTRYPOINTS_FOR_NON_TLAB_ALLOCATORS 1147 // Comment out allocators that have arm specific asm. 1148 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_region_tlab, RegionTLAB) 1149 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_region_tlab, RegionTLAB) 1150 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_region_tlab, RegionTLAB) 1151 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_region_tlab, RegionTLAB) 1152 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_region_tlab, RegionTLAB) 1153 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_region_tlab, RegionTLAB) 1154 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_region_tlab, RegionTLAB) 1155 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_region_tlab, RegionTLAB) 1156 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_region_tlab, RegionTLAB) 1157 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_region_tlab, RegionTLAB) 1158 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_region_tlab, RegionTLAB) 1159 1160 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_RESOLVED(_tlab, TLAB) 1161 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_INITIALIZED(_tlab, TLAB) 1162 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_OBJECT_WITH_ACCESS_CHECK(_tlab, TLAB) 1163 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED(_tlab, TLAB) 1164 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED8(_tlab, TLAB) 1165 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED16(_tlab, TLAB) 1166 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED32(_tlab, TLAB) 1167 // GENERATE_ALLOC_ENTRYPOINTS_ALLOC_ARRAY_RESOLVED64(_tlab, TLAB) 1168 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_BYTES(_tlab, TLAB) 1169 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_CHARS(_tlab, TLAB) 1170 GENERATE_ALLOC_ENTRYPOINTS_ALLOC_STRING_FROM_STRING(_tlab, TLAB) 1171 1172 // A hand-written override for GENERATE_ALLOC_ENTRYPOINTS_ALLOC_RESOLVED_OBJECT(_rosalloc, RosAlloc). 1173 // 1174 // If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1175 // If isInitialized=0 the compiler can only assume it's been at least resolved. 1176 .macro ART_QUICK_ALLOC_OBJECT_ROSALLOC c_name, cxx_name, isInitialized 1177 ENTRY \c_name 1178 // Fast path rosalloc allocation. 1179 // r0: type/return value, r9: Thread::Current 1180 // r1, r2, r3, r12: free. 1181 ldr r3, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] // Check if the thread local 1182 // allocation stack has room. 1183 // TODO: consider using ldrd. 1184 ldr r12, [r9, #THREAD_LOCAL_ALLOC_STACK_END_OFFSET] 1185 cmp r3, r12 1186 bhs .Lslow_path\c_name 1187 1188 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3) 1189 cmp r3, #ROSALLOC_MAX_THREAD_LOCAL_BRACKET_SIZE // Check if the size is for a thread 1190 // local allocation. Also does the 1191 // initialized and finalizable checks. 1192 // When isInitialized == 0, then the class is potentially not yet initialized. 1193 // If the class is not yet initialized, the object size will be very large to force the branch 1194 // below to be taken. 1195 // 1196 // See InitializeClassVisitors in class-inl.h for more details. 1197 bhs .Lslow_path\c_name 1198 // Compute the rosalloc bracket index 1199 // from the size. Since the size is 1200 // already aligned we can combine the 1201 // two shifts together. 1202 add r12, r9, r3, lsr #(ROSALLOC_BRACKET_QUANTUM_SIZE_SHIFT - POINTER_SIZE_SHIFT) 1203 // Subtract pointer size since ther 1204 // are no runs for 0 byte allocations 1205 // and the size is already aligned. 1206 // Load the rosalloc run (r12) 1207 ldr r12, [r12, #(THREAD_ROSALLOC_RUNS_OFFSET - __SIZEOF_POINTER__)] 1208 // Load the free list head (r3). This 1209 // will be the return val. 1210 ldr r3, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1211 cbz r3, .Lslow_path\c_name 1212 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1213 ldr r1, [r3, #ROSALLOC_SLOT_NEXT_OFFSET] // Load the next pointer of the head 1214 // and update the list head with the 1215 // next pointer. 1216 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_HEAD_OFFSET)] 1217 // Store the class pointer in the 1218 // header. This also overwrites the 1219 // next pointer. The offsets are 1220 // asserted to match. 1221 #if ROSALLOC_SLOT_NEXT_OFFSET != MIRROR_OBJECT_CLASS_OFFSET 1222 #error "Class pointer needs to overwrite next pointer." 1223 #endif 1224 POISON_HEAP_REF r0 1225 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] 1226 // Push the new object onto the thread 1227 // local allocation stack and 1228 // increment the thread local 1229 // allocation stack top. 1230 ldr r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1231 str r3, [r1], #COMPRESSED_REFERENCE_SIZE // (Increment r1 as a side effect.) 1232 str r1, [r9, #THREAD_LOCAL_ALLOC_STACK_TOP_OFFSET] 1233 // Decrement the size of the free list 1234 1235 // After this "STR" the object is published to the thread local allocation stack, 1236 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1237 // It is not yet visible to the running (user) compiled code until after the return. 1238 // 1239 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1240 // the state of the allocation stack slot. It can be a pointer to one of: 1241 // 0) Null entry, because the stack was bumped but the new pointer wasn't written yet. 1242 // (The stack initial state is "null" pointers). 1243 // 1) A partially valid object, with an invalid class pointer to the next free rosalloc slot. 1244 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1245 // Other states are not allowed. 1246 // 1247 // An object that is invalid only temporarily, and will eventually become valid. 1248 // The internal runtime code simply checks if the object is not null or is partial and then 1249 // ignores it. 1250 // 1251 // (Note: The actual check is done by seeing if a non-null object has a class pointer pointing 1252 // to ClassClass, and that the ClassClass's class pointer is self-cyclic. A rosalloc free slot 1253 // "next" pointer is not-cyclic.) 1254 // 1255 // See also b/28790624 for a listing of CLs dealing with this race. 1256 ldr r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1257 sub r1, #1 1258 // TODO: consider combining this store 1259 // and the list head store above using 1260 // strd. 1261 str r1, [r12, #(ROSALLOC_RUN_FREE_LIST_OFFSET + ROSALLOC_RUN_FREE_LIST_SIZE_OFFSET)] 1262 1263 mov r0, r3 // Set the return value and return. 1264 .if \isInitialized == 0 1265 // This barrier is only necessary when the allocation also requires 1266 // a class initialization check. 1267 // 1268 // If the class is already observably initialized, then new-instance allocations are protected 1269 // from publishing by the compiler which inserts its own StoreStore barrier. 1270 dmb ish 1271 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1272 // they should happen-after the implicit initialization check. 1273 // 1274 // TODO: Remove this dmb for class initialization checks (b/36692143) by introducing 1275 // a new observably-initialized class state. 1276 .endif 1277 bx lr 1278 1279 .Lslow_path\c_name: 1280 SETUP_SAVE_REFS_ONLY_FRAME r2 @ save callee saves in case of GC 1281 mov r1, r9 @ pass Thread::Current 1282 bl \cxx_name @ (mirror::Class* cls, Thread*) 1283 RESTORE_SAVE_REFS_ONLY_FRAME 1284 REFRESH_MARKING_REGISTER 1285 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1286 END \c_name 1287 .endm 1288 1289 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_resolved_rosalloc, artAllocObjectFromCodeResolvedRosAlloc, /* isInitialized */ 0 1290 ART_QUICK_ALLOC_OBJECT_ROSALLOC art_quick_alloc_object_initialized_rosalloc, artAllocObjectFromCodeInitializedRosAlloc, /* isInitialized */ 1 1291 1292 // The common fast path code for art_quick_alloc_object_resolved/initialized_tlab 1293 // and art_quick_alloc_object_resolved/initialized_region_tlab. 1294 // 1295 // r0: type r9: Thread::Current, r1, r2, r3, r12: free. 1296 // Need to preserve r0 to the slow path. 1297 // 1298 // If isInitialized=1 then the compiler assumes the object's class has already been initialized. 1299 // If isInitialized=0 the compiler can only assume it's been at least resolved. 1300 .macro ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH slowPathLabel isInitialized 1301 // Load thread_local_pos (r12) and 1302 // thread_local_end (r3) with ldrd. 1303 // Check constraints for ldrd. 1304 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1305 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1306 #endif 1307 ldrd r12, r3, [r9, #THREAD_LOCAL_POS_OFFSET] 1308 sub r12, r3, r12 // Compute the remaining buf size. 1309 ldr r3, [r0, #MIRROR_CLASS_OBJECT_SIZE_ALLOC_FAST_PATH_OFFSET] // Load the object size (r3). 1310 cmp r3, r12 // Check if it fits. 1311 // When isInitialized == 0, then the class is potentially not yet initialized. 1312 // If the class is not yet initialized, the object size will be very large to force the branch 1313 // below to be taken. 1314 // 1315 // See InitializeClassVisitors in class-inl.h for more details. 1316 bhi \slowPathLabel 1317 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1318 // Reload old thread_local_pos (r0) 1319 // for the return value. 1320 ldr r2, [r9, #THREAD_LOCAL_POS_OFFSET] 1321 add r1, r2, r3 1322 str r1, [r9, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1323 // After this "STR" the object is published to the thread local allocation stack, 1324 // and it will be observable from a runtime internal (eg. Heap::VisitObjects) point of view. 1325 // It is not yet visible to the running (user) compiled code until after the return. 1326 // 1327 // To avoid the memory barrier prior to the "STR", a trick is employed, by differentiating 1328 // the state of the object. It can be either: 1329 // 1) A partially valid object, with a null class pointer 1330 // (because the initial state of TLAB buffers is all 0s/nulls). 1331 // 2) A fully valid object, with a valid class pointer pointing to a real class. 1332 // Other states are not allowed. 1333 // 1334 // An object that is invalid only temporarily, and will eventually become valid. 1335 // The internal runtime code simply checks if the object is not null or is partial and then 1336 // ignores it. 1337 // 1338 // (Note: The actual check is done by checking that the object's class pointer is non-null. 1339 // Also, unlike rosalloc, the object can never be observed as null). 1340 ldr r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1341 add r1, r1, #1 1342 str r1, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] 1343 POISON_HEAP_REF r0 1344 str r0, [r2, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1345 // Fence. This is "ish" not "ishst" so 1346 // that the code after this allocation 1347 // site will see the right values in 1348 // the fields of the class. 1349 mov r0, r2 1350 .if \isInitialized == 0 1351 // This barrier is only necessary when the allocation also requires 1352 // a class initialization check. 1353 // 1354 // If the class is already observably initialized, then new-instance allocations are protected 1355 // from publishing by the compiler which inserts its own StoreStore barrier. 1356 dmb ish 1357 // Use a "dmb ish" fence here because if there are later loads of statics (e.g. class size), 1358 // they should happen-after the implicit initialization check. 1359 // 1360 // TODO: Remove dmb for class initialization checks (b/36692143) 1361 .endif 1362 bx lr 1363 .endm 1364 1365 // The common code for art_quick_alloc_object_*region_tlab 1366 .macro GENERATE_ALLOC_OBJECT_RESOLVED_TLAB name, entrypoint, isInitialized 1367 ENTRY \name 1368 // Fast path tlab allocation. 1369 // r0: type, r9: Thread::Current 1370 // r1, r2, r3, r12: free. 1371 ALLOC_OBJECT_RESOLVED_TLAB_FAST_PATH .Lslow_path\name, \isInitialized 1372 .Lslow_path\name: 1373 SETUP_SAVE_REFS_ONLY_FRAME r2 // Save callee saves in case of GC. 1374 mov r1, r9 // Pass Thread::Current. 1375 bl \entrypoint // (mirror::Class* klass, Thread*) 1376 RESTORE_SAVE_REFS_ONLY_FRAME 1377 REFRESH_MARKING_REGISTER 1378 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1379 END \name 1380 .endm 1381 1382 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_region_tlab, artAllocObjectFromCodeResolvedRegionTLAB, /* isInitialized */ 0 1383 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_region_tlab, artAllocObjectFromCodeInitializedRegionTLAB, /* isInitialized */ 1 1384 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_resolved_tlab, artAllocObjectFromCodeResolvedTLAB, /* isInitialized */ 0 1385 GENERATE_ALLOC_OBJECT_RESOLVED_TLAB art_quick_alloc_object_initialized_tlab, artAllocObjectFromCodeInitializedTLAB, /* isInitialized */ 1 1386 1387 1388 // The common fast path code for art_quick_alloc_array_resolved/initialized_tlab 1389 // and art_quick_alloc_array_resolved/initialized_region_tlab. 1390 // 1391 // r0: type r1: component_count r2: total_size r9: Thread::Current, r3, r12: free. 1392 // Need to preserve r0 and r1 to the slow path. 1393 .macro ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE slowPathLabel 1394 and r2, r2, #OBJECT_ALIGNMENT_MASK_TOGGLED // Apply alignment mask 1395 // (addr + 7) & ~7. 1396 1397 // Load thread_local_pos (r3) and 1398 // thread_local_end (r12) with ldrd. 1399 // Check constraints for ldrd. 1400 #if !((THREAD_LOCAL_POS_OFFSET + 4 == THREAD_LOCAL_END_OFFSET) && (THREAD_LOCAL_POS_OFFSET % 8 == 0)) 1401 #error "Thread::thread_local_pos/end must be consecutive and are 8 byte aligned for performance" 1402 #endif 1403 ldrd r3, r12, [r9, #THREAD_LOCAL_POS_OFFSET] 1404 sub r12, r12, r3 // Compute the remaining buf size. 1405 cmp r2, r12 // Check if the total_size fits. 1406 // The array class is always initialized here. Unlike new-instance, 1407 // this does not act as a double test. 1408 bhi \slowPathLabel 1409 // "Point of no slow path". Won't go to the slow path from here on. OK to clobber r0 and r1. 1410 add r2, r2, r3 1411 str r2, [r9, #THREAD_LOCAL_POS_OFFSET] // Store new thread_local_pos. 1412 ldr r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] // Increment thread_local_objects. 1413 add r2, r2, #1 1414 str r2, [r9, #THREAD_LOCAL_OBJECTS_OFFSET] 1415 POISON_HEAP_REF r0 1416 str r0, [r3, #MIRROR_OBJECT_CLASS_OFFSET] // Store the class pointer. 1417 str r1, [r3, #MIRROR_ARRAY_LENGTH_OFFSET] // Store the array length. 1418 // Fence. This is "ish" not "ishst" so 1419 // that the code after this allocation 1420 // site will see the right values in 1421 // the fields of the class. 1422 mov r0, r3 1423 // new-array is special. The class is loaded and immediately goes to the Initialized state 1424 // before it is published. Therefore the only fence needed is for the publication of the object. 1425 // See ClassLinker::CreateArrayClass() for more details. 1426 1427 // For publication of the new array, we don't need a 'dmb ishst' here. 1428 // The compiler generates 'dmb ishst' for all new-array insts. 1429 bx lr 1430 .endm 1431 1432 .macro GENERATE_ALLOC_ARRAY_TLAB name, entrypoint, size_setup 1433 ENTRY \name 1434 // Fast path array allocation for region tlab allocation. 1435 // r0: mirror::Class* type 1436 // r1: int32_t component_count 1437 // r9: thread 1438 // r2, r3, r12: free. 1439 \size_setup .Lslow_path\name 1440 ALLOC_ARRAY_TLAB_FAST_PATH_RESOLVED_WITH_SIZE .Lslow_path\name 1441 .Lslow_path\name: 1442 // r0: mirror::Class* klass 1443 // r1: int32_t component_count 1444 // r2: Thread* self 1445 SETUP_SAVE_REFS_ONLY_FRAME r2 // save callee saves in case of GC 1446 mov r2, r9 // pass Thread::Current 1447 bl \entrypoint 1448 RESTORE_SAVE_REFS_ONLY_FRAME 1449 REFRESH_MARKING_REGISTER 1450 RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER 1451 END \name 1452 .endm 1453 1454 .macro COMPUTE_ARRAY_SIZE_UNKNOWN slow_path 1455 bkpt // We should never enter here. 1456 // Code below is for reference. 1457 // Possibly a large object, go slow. 1458 // Also does negative array size check. 1459 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_WIDE_ARRAY_DATA_OFFSET) / 8) 1460 cmp r1, r2 1461 bhi \slow_path 1462 // Array classes are never finalizable 1463 // or uninitialized, no need to check. 1464 ldr r3, [r0, #MIRROR_CLASS_COMPONENT_TYPE_OFFSET] // Load component type 1465 UNPOISON_HEAP_REF r3 1466 ldr r3, [r3, #MIRROR_CLASS_OBJECT_PRIMITIVE_TYPE_OFFSET] 1467 lsr r3, r3, #PRIMITIVE_TYPE_SIZE_SHIFT_SHIFT // Component size shift is in high 16 1468 // bits. 1469 lsl r2, r1, r3 // Calculate data size 1470 // Add array data offset and alignment. 1471 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1472 #if MIRROR_WIDE_ARRAY_DATA_OFFSET != MIRROR_INT_ARRAY_DATA_OFFSET + 4 1473 #error Long array data offset must be 4 greater than int array data offset. 1474 #endif 1475 1476 add r3, r3, #1 // Add 4 to the length only if the 1477 // component size shift is 3 1478 // (for 64 bit alignment). 1479 and r3, r3, #4 1480 add r2, r2, r3 1481 .endm 1482 1483 .macro COMPUTE_ARRAY_SIZE_8 slow_path 1484 // Possibly a large object, go slow. 1485 // Also does negative array size check. 1486 movw r2, #(MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) 1487 cmp r1, r2 1488 bhi \slow_path 1489 // Add array data offset and alignment. 1490 add r2, r1, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1491 .endm 1492 1493 .macro COMPUTE_ARRAY_SIZE_16 slow_path 1494 // Possibly a large object, go slow. 1495 // Also does negative array size check. 1496 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 2) 1497 cmp r1, r2 1498 bhi \slow_path 1499 lsl r2, r1, #1 1500 // Add array data offset and alignment. 1501 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1502 .endm 1503 1504 .macro COMPUTE_ARRAY_SIZE_32 slow_path 1505 // Possibly a large object, go slow. 1506 // Also does negative array size check. 1507 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_INT_ARRAY_DATA_OFFSET) / 4) 1508 cmp r1, r2 1509 bhi \slow_path 1510 lsl r2, r1, #2 1511 // Add array data offset and alignment. 1512 add r2, r2, #(MIRROR_INT_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1513 .endm 1514 1515 .macro COMPUTE_ARRAY_SIZE_64 slow_path 1516 // Possibly a large object, go slow. 1517 // Also does negative array size check. 1518 movw r2, #((MIN_LARGE_OBJECT_THRESHOLD - MIRROR_LONG_ARRAY_DATA_OFFSET) / 8) 1519 cmp r1, r2 1520 bhi \slow_path 1521 lsl r2, r1, #3 1522 // Add array data offset and alignment. 1523 add r2, r2, #(MIRROR_WIDE_ARRAY_DATA_OFFSET + OBJECT_ALIGNMENT_MASK) 1524 .endm 1525 1526 // TODO(ngeoffray): art_quick_alloc_array_resolved_region_tlab is not used for arm, remove 1527 // the entrypoint once all backends have been updated to use the size variants. 1528 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1529 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_8 1530 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_16 1531 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_32 1532 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_region_tlab, artAllocArrayFromCodeResolvedRegionTLAB, COMPUTE_ARRAY_SIZE_64 1533 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_UNKNOWN 1534 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved8_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_8 1535 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved16_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_16 1536 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved32_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_32 1537 GENERATE_ALLOC_ARRAY_TLAB art_quick_alloc_array_resolved64_tlab, artAllocArrayFromCodeResolvedTLAB, COMPUTE_ARRAY_SIZE_64 1538 1539 /* 1540 * Called by managed code when the value in rSUSPEND has been decremented to 0. 1541 */ 1542 .extern artTestSuspendFromCode 1543 ENTRY art_quick_test_suspend 1544 SETUP_SAVE_EVERYTHING_FRAME r0, RUNTIME_SAVE_EVERYTHING_FOR_SUSPEND_CHECK_METHOD_OFFSET @ save everything for GC stack crawl 1545 mov r0, rSELF 1546 bl artTestSuspendFromCode @ (Thread*) 1547 RESTORE_SAVE_EVERYTHING_FRAME 1548 REFRESH_MARKING_REGISTER 1549 bx lr 1550 END art_quick_test_suspend 1551 1552 ENTRY art_quick_implicit_suspend 1553 mov r0, rSELF 1554 SETUP_SAVE_REFS_ONLY_FRAME r1 @ save callee saves for stack crawl 1555 bl artTestSuspendFromCode @ (Thread*) 1556 RESTORE_SAVE_REFS_ONLY_FRAME 1557 REFRESH_MARKING_REGISTER 1558 bx lr 1559 END art_quick_implicit_suspend 1560 1561 /* 1562 * Called by managed code that is attempting to call a method on a proxy class. On entry 1563 * r0 holds the proxy method and r1 holds the receiver; r2 and r3 may contain arguments. The 1564 * frame size of the invoked proxy method agrees with a ref and args callee save frame. 1565 */ 1566 .extern artQuickProxyInvokeHandler 1567 ENTRY art_quick_proxy_invoke_handler 1568 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1569 mov r2, r9 @ pass Thread::Current 1570 mov r3, sp @ pass SP 1571 blx artQuickProxyInvokeHandler @ (Method* proxy method, receiver, Thread*, SP) 1572 ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1573 // Tear down the callee-save frame. Skip arg registers. 1574 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1575 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1576 RESTORE_SAVE_REFS_ONLY_FRAME 1577 REFRESH_MARKING_REGISTER 1578 cbnz r2, 1f @ success if no exception is pending 1579 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1580 bx lr @ return on success 1581 1: 1582 DELIVER_PENDING_EXCEPTION 1583 END art_quick_proxy_invoke_handler 1584 1585 /* 1586 * Called to resolve an imt conflict. 1587 * r0 is the conflict ArtMethod. 1588 * r12 is a hidden argument that holds the target interface method's dex method index. 1589 * 1590 * Note that this stub writes to r0, r4, and r12. 1591 */ 1592 .extern artLookupResolvedMethod 1593 ENTRY art_quick_imt_conflict_trampoline 1594 push {r1-r2} 1595 .cfi_adjust_cfa_offset (2 * 4) 1596 .cfi_rel_offset r1, 0 1597 .cfi_rel_offset r2, 4 1598 ldr r4, [sp, #(2 * 4)] // Load referrer. 1599 ldr r2, [r0, #ART_METHOD_JNI_OFFSET_32] // Load ImtConflictTable 1600 // Load the declaring class (without read barrier) and access flags (for obsolete method check). 1601 // The obsolete flag is set with suspended threads, so we do not need an acquire operation here. 1602 #if ART_METHOD_ACCESS_FLAGS_OFFSET != ART_METHOD_DECLARING_CLASS_OFFSET + 4 1603 #error "Expecting declaring class and access flags to be consecutive for LDRD." 1604 #endif 1605 ldrd r0, r1, [r4, #ART_METHOD_DECLARING_CLASS_OFFSET] 1606 // If the method is obsolete, just go through the dex cache miss slow path. 1607 lsrs r1, #(ACC_OBSOLETE_METHOD_SHIFT + 1) 1608 bcs .Limt_conflict_trampoline_dex_cache_miss 1609 ldr r4, [r0, #MIRROR_CLASS_DEX_CACHE_OFFSET] // Load the DexCache (without read barrier). 1610 UNPOISON_HEAP_REF r4 1611 ubfx r1, r12, #0, #METHOD_DEX_CACHE_HASH_BITS // Calculate DexCache method slot index. 1612 ldr r4, [r4, #MIRROR_DEX_CACHE_RESOLVED_METHODS_OFFSET] // Load the resolved methods. 1613 add r4, r4, r1, lsl #(POINTER_SIZE_SHIFT + 1) // Load DexCache method slot address. 1614 1615 // FIXME: Configure the build to use the faster code when appropriate. 1616 // Currently we fall back to the slower version. 1617 #if HAS_ATOMIC_LDRD 1618 ldrd r0, r1, [r4] 1619 #else 1620 push {r3} 1621 .cfi_adjust_cfa_offset 4 1622 .cfi_rel_offset r3, 0 1623 .Limt_conflict_trampoline_retry_load: 1624 ldrexd r0, r1, [r4] 1625 strexd r3, r0, r1, [r4] 1626 cmp r3, #0 1627 bne .Limt_conflict_trampoline_retry_load 1628 pop {r3} 1629 .cfi_adjust_cfa_offset -4 1630 .cfi_restore r3 1631 #endif 1632 1633 ldr r4, [r2] // Load first entry in ImtConflictTable. 1634 cmp r1, r12 // Compare method index to see if we had a DexCache method hit. 1635 bne .Limt_conflict_trampoline_dex_cache_miss 1636 .Limt_table_iterate: 1637 cmp r4, r0 1638 // Branch if found. Benchmarks have shown doing a branch here is better. 1639 beq .Limt_table_found 1640 // If the entry is null, the interface method is not in the ImtConflictTable. 1641 cbz r4, .Lconflict_trampoline 1642 // Iterate over the entries of the ImtConflictTable. 1643 ldr r4, [r2, #(2 * __SIZEOF_POINTER__)]! 1644 b .Limt_table_iterate 1645 .Limt_table_found: 1646 // We successfully hit an entry in the table. Load the target method 1647 // and jump to it. 1648 ldr r0, [r2, #__SIZEOF_POINTER__] 1649 .cfi_remember_state 1650 pop {r1-r2} 1651 .cfi_adjust_cfa_offset -(2 * 4) 1652 .cfi_restore r1 1653 .cfi_restore r2 1654 ldr pc, [r0, #ART_METHOD_QUICK_CODE_OFFSET_32] 1655 .cfi_restore_state 1656 .Lconflict_trampoline: 1657 // Call the runtime stub to populate the ImtConflictTable and jump to the 1658 // resolved method. 1659 .cfi_remember_state 1660 pop {r1-r2} 1661 .cfi_adjust_cfa_offset -(2 * 4) 1662 .cfi_restore r1 1663 .cfi_restore r2 1664 INVOKE_TRAMPOLINE_BODY artInvokeInterfaceTrampoline 1665 .cfi_restore_state 1666 .Limt_conflict_trampoline_dex_cache_miss: 1667 // We're not creating a proper runtime method frame here, 1668 // artLookupResolvedMethod() is not allowed to walk the stack. 1669 1670 // Save ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1671 push {r2-r4, lr} 1672 .cfi_adjust_cfa_offset (4 * 4) 1673 .cfi_rel_offset r3, 4 1674 .cfi_rel_offset lr, 12 1675 // Save FPR args. 1676 vpush {d0-d7} 1677 .cfi_adjust_cfa_offset (8 * 8) 1678 1679 mov r0, ip // Pass method index. 1680 ldr r1, [sp, #(8 * 8 + 6 * 4)] // Pass referrer. 1681 bl artLookupResolvedMethod // (uint32_t method_index, ArtMethod* referrer) 1682 1683 // Restore FPR args. 1684 vpop {d0-d7} 1685 .cfi_adjust_cfa_offset -(8 * 8) 1686 // Restore ImtConflictTable (r2), remaining arg (r3), first entry (r4), return address (lr). 1687 pop {r2-r4, lr} 1688 .cfi_adjust_cfa_offset -(4 * 4) 1689 .cfi_restore r3 1690 .cfi_restore lr 1691 1692 cmp r0, #0 // If the method wasn't resolved, 1693 beq .Lconflict_trampoline // skip the lookup and go to artInvokeInterfaceTrampoline(). 1694 b .Limt_table_iterate 1695 END art_quick_imt_conflict_trampoline 1696 1697 .extern artQuickResolutionTrampoline 1698 ENTRY art_quick_resolution_trampoline 1699 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1700 mov r2, r9 @ pass Thread::Current 1701 mov r3, sp @ pass SP 1702 blx artQuickResolutionTrampoline @ (Method* called, receiver, Thread*, SP) 1703 cbz r0, 1f @ is code pointer null? goto exception 1704 mov r12, r0 1705 ldr r0, [sp, #0] @ load resolved method in r0 1706 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1707 REFRESH_MARKING_REGISTER 1708 bx r12 @ tail-call into actual code 1709 1: 1710 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1711 DELIVER_PENDING_EXCEPTION 1712 END art_quick_resolution_trampoline 1713 1714 /* 1715 * Called to do a generic JNI down-call 1716 */ 1717 ENTRY art_quick_generic_jni_trampoline 1718 SETUP_SAVE_REFS_AND_ARGS_FRAME_WITH_METHOD_IN_R0 1719 1720 // Save rSELF 1721 mov r11, rSELF 1722 // Save SP , so we can have static CFI info. r10 is saved in ref_and_args. 1723 mov r10, sp 1724 .cfi_def_cfa_register r10 1725 1726 sub sp, sp, #5120 1727 1728 // prepare for artQuickGenericJniTrampoline call 1729 // (Thread*, SP) 1730 // r0 r1 <= C calling convention 1731 // rSELF r10 <= where they are 1732 1733 mov r0, rSELF // Thread* 1734 mov r1, r10 1735 blx artQuickGenericJniTrampoline // (Thread*, sp) 1736 1737 // The C call will have registered the complete save-frame on success. 1738 // The result of the call is: 1739 // r0: pointer to native code, 0 on error. 1740 // r1: pointer to the bottom of the used area of the alloca, can restore stack till there. 1741 1742 // Check for error = 0. 1743 cbz r0, .Lexception_in_native 1744 1745 // Release part of the alloca. 1746 mov sp, r1 1747 1748 // Save the code pointer 1749 mov r12, r0 1750 1751 // Load parameters from frame into registers. 1752 pop {r0-r3} 1753 1754 // Softfloat. 1755 // TODO: Change to hardfloat when supported. 1756 1757 blx r12 // native call. 1758 1759 // result sign extension is handled in C code 1760 // prepare for artQuickGenericJniEndTrampoline call 1761 // (Thread*, result, result_f) 1762 // r0 r2,r3 stack <= C calling convention 1763 // r11 r0,r1 r0,r1 <= where they are 1764 sub sp, sp, #8 // Stack alignment. 1765 1766 push {r0-r1} 1767 mov r3, r1 1768 mov r2, r0 1769 mov r0, r11 1770 1771 blx artQuickGenericJniEndTrampoline 1772 1773 // Restore self pointer. 1774 mov r9, r11 1775 1776 // Pending exceptions possible. 1777 ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1778 cbnz r2, .Lexception_in_native 1779 1780 // Tear down the alloca. 1781 mov sp, r10 1782 .cfi_def_cfa_register sp 1783 1784 // Tear down the callee-save frame. Skip arg registers. 1785 add sp, #FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1786 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY) 1787 RESTORE_SAVE_REFS_ONLY_FRAME 1788 REFRESH_MARKING_REGISTER 1789 1790 // store into fpr, for when it's a fpr return... 1791 vmov d0, r0, r1 1792 bx lr // ret 1793 // Undo the unwinding information from above since it doesn't apply below. 1794 .cfi_def_cfa_register r10 1795 .cfi_adjust_cfa_offset FRAME_SIZE_SAVE_REFS_AND_ARGS-FRAME_SIZE_SAVE_REFS_ONLY 1796 1797 .Lexception_in_native: 1798 ldr ip, [r9, #THREAD_TOP_QUICK_FRAME_OFFSET] 1799 add ip, ip, #-1 // Remove the GenericJNI tag. ADD/SUB writing directly to SP is UNPREDICTABLE. 1800 mov sp, ip 1801 .cfi_def_cfa_register sp 1802 # This will create a new save-all frame, required by the runtime. 1803 DELIVER_PENDING_EXCEPTION 1804 END art_quick_generic_jni_trampoline 1805 1806 .extern artQuickToInterpreterBridge 1807 ENTRY art_quick_to_interpreter_bridge 1808 SETUP_SAVE_REFS_AND_ARGS_FRAME r1 1809 mov r1, r9 @ pass Thread::Current 1810 mov r2, sp @ pass SP 1811 blx artQuickToInterpreterBridge @ (Method* method, Thread*, SP) 1812 ldr r2, [r9, #THREAD_EXCEPTION_OFFSET] @ load Thread::Current()->exception_ 1813 // Tear down the callee-save frame. Skip arg registers. 1814 add sp, #(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1815 .cfi_adjust_cfa_offset -(FRAME_SIZE_SAVE_REFS_AND_ARGS - FRAME_SIZE_SAVE_REFS_ONLY) 1816 RESTORE_SAVE_REFS_ONLY_FRAME 1817 REFRESH_MARKING_REGISTER 1818 cbnz r2, 1f @ success if no exception is pending 1819 vmov d0, r0, r1 @ store into fpr, for when it's a fpr return... 1820 bx lr @ return on success 1821 1: 1822 DELIVER_PENDING_EXCEPTION 1823 END art_quick_to_interpreter_bridge 1824 1825 /* 1826 * Called to attempt to execute an obsolete method. 1827 */ 1828 ONE_ARG_RUNTIME_EXCEPTION art_invoke_obsolete_method_stub, artInvokeObsoleteMethod 1829 1830 /* 1831 * Routine that intercepts method calls and returns. 1832 */ 1833 .extern artInstrumentationMethodEntryFromCode 1834 .extern artInstrumentationMethodExitFromCode 1835 ENTRY art_quick_instrumentation_entry 1836 @ Make stack crawlable and clobber r2 and r3 (post saving) 1837 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 1838 @ preserve r0 (not normally an arg) knowing there is a spare slot in kSaveRefsAndArgs. 1839 str r0, [sp, #4] 1840 mov r2, r9 @ pass Thread::Current 1841 mov r3, sp @ pass SP 1842 blx artInstrumentationMethodEntryFromCode @ (Method*, Object*, Thread*, SP) 1843 cbz r0, .Ldeliver_instrumentation_entry_exception 1844 @ Deliver exception if we got nullptr as function. 1845 mov r12, r0 @ r12 holds reference to code 1846 ldr r0, [sp, #4] @ restore r0 1847 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1848 adr lr, art_quick_instrumentation_exit + /* thumb mode */ 1 1849 @ load art_quick_instrumentation_exit into lr in thumb mode 1850 REFRESH_MARKING_REGISTER 1851 bx r12 @ call method with lr set to art_quick_instrumentation_exit 1852 .Ldeliver_instrumentation_entry_exception: 1853 @ Deliver exception for art_quick_instrumentation_entry placed after 1854 @ art_quick_instrumentation_exit so that the fallthrough works. 1855 RESTORE_SAVE_REFS_AND_ARGS_FRAME 1856 DELIVER_PENDING_EXCEPTION 1857 END art_quick_instrumentation_entry 1858 1859 ENTRY art_quick_instrumentation_exit 1860 mov lr, #0 @ link register is to here, so clobber with 0 for later checks 1861 SETUP_SAVE_EVERYTHING_FRAME r2 1862 1863 add r3, sp, #8 @ store fpr_res pointer, in kSaveEverything frame 1864 add r2, sp, #136 @ store gpr_res pointer, in kSaveEverything frame 1865 mov r1, sp @ pass SP 1866 mov r0, r9 @ pass Thread::Current 1867 blx artInstrumentationMethodExitFromCode @ (Thread*, SP, gpr_res*, fpr_res*) 1868 1869 cbz r0, .Ldo_deliver_instrumentation_exception 1870 @ Deliver exception if we got nullptr as function. 1871 cbnz r1, .Ldeoptimize 1872 // Normal return. 1873 str r0, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1874 @ Set return pc. 1875 RESTORE_SAVE_EVERYTHING_FRAME 1876 REFRESH_MARKING_REGISTER 1877 bx lr 1878 .Ldo_deliver_instrumentation_exception: 1879 DELIVER_PENDING_EXCEPTION_FRAME_READY 1880 .Ldeoptimize: 1881 str r1, [sp, #FRAME_SIZE_SAVE_EVERYTHING - 4] 1882 @ Set return pc. 1883 RESTORE_SAVE_EVERYTHING_FRAME 1884 // Jump to art_quick_deoptimize. 1885 b art_quick_deoptimize 1886 END art_quick_instrumentation_exit 1887 1888 /* 1889 * Instrumentation has requested that we deoptimize into the interpreter. The deoptimization 1890 * will long jump to the upcall with a special exception of -1. 1891 */ 1892 .extern artDeoptimize 1893 ENTRY art_quick_deoptimize 1894 SETUP_SAVE_EVERYTHING_FRAME r0 1895 mov r0, r9 @ pass Thread::Current 1896 blx artDeoptimize @ (Thread*) 1897 END art_quick_deoptimize 1898 1899 /* 1900 * Compiled code has requested that we deoptimize into the interpreter. The deoptimization 1901 * will long jump to the interpreter bridge. 1902 */ 1903 .extern artDeoptimizeFromCompiledCode 1904 ENTRY art_quick_deoptimize_from_compiled_code 1905 SETUP_SAVE_EVERYTHING_FRAME r1 1906 mov r1, r9 @ pass Thread::Current 1907 blx artDeoptimizeFromCompiledCode @ (DeoptimizationKind, Thread*) 1908 END art_quick_deoptimize_from_compiled_code 1909 1910 /* 1911 * Signed 64-bit integer multiply. 1912 * 1913 * Consider WXxYZ (r1r0 x r3r2) with a long multiply: 1914 * WX 1915 * x YZ 1916 * -------- 1917 * ZW ZX 1918 * YW YX 1919 * 1920 * The low word of the result holds ZX, the high word holds 1921 * (ZW+YX) + (the high overflow from ZX). YW doesn't matter because 1922 * it doesn't fit in the low 64 bits. 1923 * 1924 * Unlike most ARM math operations, multiply instructions have 1925 * restrictions on using the same register more than once (Rd and Rm 1926 * cannot be the same). 1927 */ 1928 /* mul-long vAA, vBB, vCC */ 1929 ENTRY art_quick_mul_long 1930 push {r9-r10} 1931 .cfi_adjust_cfa_offset 8 1932 .cfi_rel_offset r9, 0 1933 .cfi_rel_offset r10, 4 1934 mul ip, r2, r1 @ ip<- ZxW 1935 umull r9, r10, r2, r0 @ r9/r10 <- ZxX 1936 mla r2, r0, r3, ip @ r2<- YxX + (ZxW) 1937 add r10, r2, r10 @ r10<- r10 + low(ZxW + (YxX)) 1938 mov r0,r9 1939 mov r1,r10 1940 pop {r9-r10} 1941 .cfi_adjust_cfa_offset -8 1942 .cfi_restore r9 1943 .cfi_restore r10 1944 bx lr 1945 END art_quick_mul_long 1946 1947 /* 1948 * Long integer shift. This is different from the generic 32/64-bit 1949 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1950 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1951 * 6 bits. 1952 * On entry: 1953 * r0: low word 1954 * r1: high word 1955 * r2: shift count 1956 */ 1957 /* shl-long vAA, vBB, vCC */ 1958 ARM_ENTRY art_quick_shl_long @ ARM code as thumb code requires spills 1959 and r2, r2, #63 @ r2<- r2 & 0x3f 1960 mov r1, r1, asl r2 @ r1<- r1 << r2 1961 rsb r3, r2, #32 @ r3<- 32 - r2 1962 orr r1, r1, r0, lsr r3 @ r1<- r1 | (r0 << (32-r2)) 1963 subs ip, r2, #32 @ ip<- r2 - 32 1964 movpl r1, r0, asl ip @ if r2 >= 32, r1<- r0 << (r2-32) 1965 mov r0, r0, asl r2 @ r0<- r0 << r2 1966 bx lr 1967 END art_quick_shl_long 1968 1969 /* 1970 * Long integer shift. This is different from the generic 32/64-bit 1971 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1972 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1973 * 6 bits. 1974 * On entry: 1975 * r0: low word 1976 * r1: high word 1977 * r2: shift count 1978 */ 1979 /* shr-long vAA, vBB, vCC */ 1980 ARM_ENTRY art_quick_shr_long @ ARM code as thumb code requires spills 1981 and r2, r2, #63 @ r0<- r0 & 0x3f 1982 mov r0, r0, lsr r2 @ r0<- r2 >> r2 1983 rsb r3, r2, #32 @ r3<- 32 - r2 1984 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 1985 subs ip, r2, #32 @ ip<- r2 - 32 1986 movpl r0, r1, asr ip @ if r2 >= 32, r0<-r1 >> (r2-32) 1987 mov r1, r1, asr r2 @ r1<- r1 >> r2 1988 bx lr 1989 END art_quick_shr_long 1990 1991 /* 1992 * Long integer shift. This is different from the generic 32/64-bit 1993 * binary operations because vAA/vBB are 64-bit but vCC (the shift 1994 * distance) is 32-bit. Also, Dalvik requires us to ignore all but the low 1995 * 6 bits. 1996 * On entry: 1997 * r0: low word 1998 * r1: high word 1999 * r2: shift count 2000 */ 2001 /* ushr-long vAA, vBB, vCC */ 2002 ARM_ENTRY art_quick_ushr_long @ ARM code as thumb code requires spills 2003 and r2, r2, #63 @ r0<- r0 & 0x3f 2004 mov r0, r0, lsr r2 @ r0<- r2 >> r2 2005 rsb r3, r2, #32 @ r3<- 32 - r2 2006 orr r0, r0, r1, asl r3 @ r0<- r0 | (r1 << (32-r2)) 2007 subs ip, r2, #32 @ ip<- r2 - 32 2008 movpl r0, r1, lsr ip @ if r2 >= 32, r0<-r1 >>> (r2-32) 2009 mov r1, r1, lsr r2 @ r1<- r1 >>> r2 2010 bx lr 2011 END art_quick_ushr_long 2012 2013 /* 2014 * String's indexOf. 2015 * 2016 * On entry: 2017 * r0: string object (known non-null) 2018 * r1: char to match (known <= 0xFFFF) 2019 * r2: Starting offset in string data 2020 */ 2021 ENTRY art_quick_indexof 2022 push {r4, r10-r11, lr} @ 4 words of callee saves 2023 .cfi_adjust_cfa_offset 16 2024 .cfi_rel_offset r4, 0 2025 .cfi_rel_offset r10, 4 2026 .cfi_rel_offset r11, 8 2027 .cfi_rel_offset lr, 12 2028 #if (STRING_COMPRESSION_FEATURE) 2029 ldr r4, [r0, #MIRROR_STRING_COUNT_OFFSET] 2030 #else 2031 ldr r3, [r0, #MIRROR_STRING_COUNT_OFFSET] 2032 #endif 2033 add r0, #MIRROR_STRING_VALUE_OFFSET 2034 #if (STRING_COMPRESSION_FEATURE) 2035 /* r4 count (with flag) and r3 holds actual length */ 2036 lsr r3, r4, #1 2037 #endif 2038 /* Clamp start to [0..count] */ 2039 cmp r2, #0 2040 it lt 2041 movlt r2, #0 2042 cmp r2, r3 2043 it gt 2044 movgt r2, r3 2045 2046 /* Save a copy in r12 to later compute result */ 2047 mov r12, r0 2048 2049 /* Build pointer to start of data to compare and pre-bias */ 2050 #if (STRING_COMPRESSION_FEATURE) 2051 lsrs r4, r4, #1 2052 bcc .Lstring_indexof_compressed 2053 #endif 2054 add r0, r0, r2, lsl #1 2055 sub r0, #2 2056 2057 /* Compute iteration count */ 2058 sub r2, r3, r2 2059 2060 /* 2061 * At this point we have: 2062 * r0: start of data to test 2063 * r1: char to compare 2064 * r2: iteration count 2065 * r4: compression style (used temporarily) 2066 * r12: original start of string data 2067 * r3, r4, r10, r11 available for loading string data 2068 */ 2069 2070 subs r2, #4 2071 blt .Lindexof_remainder 2072 2073 .Lindexof_loop4: 2074 ldrh r3, [r0, #2]! 2075 ldrh r4, [r0, #2]! 2076 ldrh r10, [r0, #2]! 2077 ldrh r11, [r0, #2]! 2078 cmp r3, r1 2079 beq .Lmatch_0 2080 cmp r4, r1 2081 beq .Lmatch_1 2082 cmp r10, r1 2083 beq .Lmatch_2 2084 cmp r11, r1 2085 beq .Lmatch_3 2086 subs r2, #4 2087 bge .Lindexof_loop4 2088 2089 .Lindexof_remainder: 2090 adds r2, #4 2091 beq .Lindexof_nomatch 2092 2093 .Lindexof_loop1: 2094 ldrh r3, [r0, #2]! 2095 cmp r3, r1 2096 beq .Lmatch_3 2097 subs r2, #1 2098 bne .Lindexof_loop1 2099 2100 .Lindexof_nomatch: 2101 mov r0, #-1 2102 pop {r4, r10-r11, pc} 2103 2104 .Lmatch_0: 2105 sub r0, #6 2106 sub r0, r12 2107 asr r0, r0, #1 2108 pop {r4, r10-r11, pc} 2109 .Lmatch_1: 2110 sub r0, #4 2111 sub r0, r12 2112 asr r0, r0, #1 2113 pop {r4, r10-r11, pc} 2114 .Lmatch_2: 2115 sub r0, #2 2116 sub r0, r12 2117 asr r0, r0, #1 2118 pop {r4, r10-r11, pc} 2119 .Lmatch_3: 2120 sub r0, r12 2121 asr r0, r0, #1 2122 pop {r4, r10-r11, pc} 2123 #if (STRING_COMPRESSION_FEATURE) 2124 .Lstring_indexof_compressed: 2125 add r0, r0, r2 2126 sub r0, #1 2127 sub r2, r3, r2 2128 .Lstring_indexof_compressed_loop: 2129 subs r2, #1 2130 blt .Lindexof_nomatch 2131 ldrb r3, [r0, #1]! 2132 cmp r3, r1 2133 beq .Lstring_indexof_compressed_matched 2134 b .Lstring_indexof_compressed_loop 2135 .Lstring_indexof_compressed_matched: 2136 sub r0, r12 2137 pop {r4, r10-r11, pc} 2138 #endif 2139 END art_quick_indexof 2140 2141 /* Assembly routines used to handle ABI differences. */ 2142 2143 /* double fmod(double a, double b) */ 2144 .extern fmod 2145 ENTRY art_quick_fmod 2146 push {lr} 2147 .cfi_adjust_cfa_offset 4 2148 .cfi_rel_offset lr, 0 2149 sub sp, #4 2150 .cfi_adjust_cfa_offset 4 2151 vmov r0, r1, d0 2152 vmov r2, r3, d1 2153 bl fmod 2154 vmov d0, r0, r1 2155 add sp, #4 2156 .cfi_adjust_cfa_offset -4 2157 pop {pc} 2158 END art_quick_fmod 2159 2160 /* float fmodf(float a, float b) */ 2161 .extern fmodf 2162 ENTRY art_quick_fmodf 2163 push {lr} 2164 .cfi_adjust_cfa_offset 4 2165 .cfi_rel_offset lr, 0 2166 sub sp, #4 2167 .cfi_adjust_cfa_offset 4 2168 vmov r0, r1, d0 2169 bl fmodf 2170 vmov s0, r0 2171 add sp, #4 2172 .cfi_adjust_cfa_offset -4 2173 pop {pc} 2174 END art_quick_fmodf 2175 2176 /* int64_t art_d2l(double d) */ 2177 .extern art_d2l 2178 ENTRY art_quick_d2l 2179 vmov r0, r1, d0 2180 b art_d2l 2181 END art_quick_d2l 2182 2183 /* int64_t art_f2l(float f) */ 2184 .extern art_f2l 2185 ENTRY art_quick_f2l 2186 vmov r0, s0 2187 b art_f2l 2188 END art_quick_f2l 2189 2190 /* float art_l2f(int64_t l) */ 2191 .extern art_l2f 2192 ENTRY art_quick_l2f 2193 push {lr} 2194 .cfi_adjust_cfa_offset 4 2195 .cfi_rel_offset lr, 0 2196 sub sp, #4 2197 .cfi_adjust_cfa_offset 4 2198 bl art_l2f 2199 vmov s0, r0 2200 add sp, #4 2201 .cfi_adjust_cfa_offset -4 2202 pop {pc} 2203 END art_quick_l2f 2204 2205 .macro CONDITIONAL_CBZ reg, reg_if, dest 2206 .ifc \reg, \reg_if 2207 cbz \reg, \dest 2208 .endif 2209 .endm 2210 2211 .macro CONDITIONAL_CMPBZ reg, reg_if, dest 2212 .ifc \reg, \reg_if 2213 cmp \reg, #0 2214 beq \dest 2215 .endif 2216 .endm 2217 2218 // Use CBZ if the register is in {r0, r7} otherwise compare and branch. 2219 .macro SMART_CBZ reg, dest 2220 CONDITIONAL_CBZ \reg, r0, \dest 2221 CONDITIONAL_CBZ \reg, r1, \dest 2222 CONDITIONAL_CBZ \reg, r2, \dest 2223 CONDITIONAL_CBZ \reg, r3, \dest 2224 CONDITIONAL_CBZ \reg, r4, \dest 2225 CONDITIONAL_CBZ \reg, r5, \dest 2226 CONDITIONAL_CBZ \reg, r6, \dest 2227 CONDITIONAL_CBZ \reg, r7, \dest 2228 CONDITIONAL_CMPBZ \reg, r8, \dest 2229 CONDITIONAL_CMPBZ \reg, r9, \dest 2230 CONDITIONAL_CMPBZ \reg, r10, \dest 2231 CONDITIONAL_CMPBZ \reg, r11, \dest 2232 CONDITIONAL_CMPBZ \reg, r12, \dest 2233 CONDITIONAL_CMPBZ \reg, r13, \dest 2234 CONDITIONAL_CMPBZ \reg, r14, \dest 2235 CONDITIONAL_CMPBZ \reg, r15, \dest 2236 .endm 2237 2238 /* 2239 * Create a function `name` calling the ReadBarrier::Mark routine, 2240 * getting its argument and returning its result through register 2241 * `reg`, saving and restoring all caller-save registers. 2242 * 2243 * IP is clobbered; `reg` must not be IP. 2244 * 2245 * If `reg` is different from `r0`, the generated function follows a 2246 * non-standard runtime calling convention: 2247 * - register `reg` is used to pass the (sole) argument of this 2248 * function (instead of R0); 2249 * - register `reg` is used to return the result of this function 2250 * (instead of R0); 2251 * - R0 is treated like a normal (non-argument) caller-save register; 2252 * - everything else is the same as in the standard runtime calling 2253 * convention (e.g. standard callee-save registers are preserved). 2254 */ 2255 .macro READ_BARRIER_MARK_REG name, reg 2256 ENTRY \name 2257 // Null check so that we can load the lock word. 2258 SMART_CBZ \reg, .Lret_rb_\name 2259 // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked. 2260 ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET] 2261 tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2262 beq .Lnot_marked_rb_\name 2263 // Already marked, return right away. 2264 .Lret_rb_\name: 2265 bx lr 2266 2267 .Lnot_marked_rb_\name: 2268 // Test that both the forwarding state bits are 1. 2269 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2270 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2271 // the highest bits and the "forwarding address" state to have all bits set. 2272 #error "Unexpected lock word state shift or forwarding address state value." 2273 #endif 2274 cmp ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2275 bhs .Lret_forwarding_address\name 2276 2277 .Lslow_rb_\name: 2278 // Save IP: The kSaveEverything entrypoint art_quick_resolve_string used to 2279 // make a tail call here. Currently, it serves only for stack alignment but 2280 // we may reintroduce kSaveEverything calls here in the future. 2281 push {r0-r4, r9, ip, lr} @ save return address, core caller-save registers and ip 2282 .cfi_adjust_cfa_offset 32 2283 .cfi_rel_offset r0, 0 2284 .cfi_rel_offset r1, 4 2285 .cfi_rel_offset r2, 8 2286 .cfi_rel_offset r3, 12 2287 .cfi_rel_offset r4, 16 2288 .cfi_rel_offset r9, 20 2289 .cfi_rel_offset ip, 24 2290 .cfi_rel_offset lr, 28 2291 2292 .ifnc \reg, r0 2293 mov r0, \reg @ pass arg1 - obj from `reg` 2294 .endif 2295 2296 vpush {s0-s15} @ save floating-point caller-save registers 2297 .cfi_adjust_cfa_offset 64 2298 bl artReadBarrierMark @ r0 <- artReadBarrierMark(obj) 2299 vpop {s0-s15} @ restore floating-point registers 2300 .cfi_adjust_cfa_offset -64 2301 2302 .ifc \reg, r0 @ Save result to the stack slot or destination register. 2303 str r0, [sp, #0] 2304 .else 2305 .ifc \reg, r1 2306 str r0, [sp, #4] 2307 .else 2308 .ifc \reg, r2 2309 str r0, [sp, #8] 2310 .else 2311 .ifc \reg, r3 2312 str r0, [sp, #12] 2313 .else 2314 .ifc \reg, r4 2315 str r0, [sp, #16] 2316 .else 2317 .ifc \reg, r9 2318 str r0, [sp, #20] 2319 .else 2320 mov \reg, r0 2321 .endif 2322 .endif 2323 .endif 2324 .endif 2325 .endif 2326 .endif 2327 2328 pop {r0-r4, r9, ip, lr} @ restore caller-save registers 2329 .cfi_adjust_cfa_offset -32 2330 .cfi_restore r0 2331 .cfi_restore r1 2332 .cfi_restore r2 2333 .cfi_restore r3 2334 .cfi_restore r4 2335 .cfi_restore r9 2336 .cfi_restore ip 2337 .cfi_restore lr 2338 bx lr 2339 .Lret_forwarding_address\name: 2340 // Shift left by the forwarding address shift. This clears out the state bits since they are 2341 // in the top 2 bits of the lock word. 2342 lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2343 bx lr 2344 END \name 2345 .endm 2346 2347 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg00, r0 2348 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg01, r1 2349 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg02, r2 2350 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg03, r3 2351 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg04, r4 2352 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg05, r5 2353 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg06, r6 2354 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg07, r7 2355 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg08, r8 2356 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg09, r9 2357 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg10, r10 2358 READ_BARRIER_MARK_REG art_quick_read_barrier_mark_reg11, r11 2359 2360 // Helper macros for Baker CC read barrier mark introspection (BRBMI). 2361 .macro BRBMI_FOR_12_REGISTERS macro_for_register, macro_for_reserved_register 2362 \macro_for_register r0 2363 \macro_for_register r1 2364 \macro_for_register r2 2365 \macro_for_register r3 2366 \macro_for_reserved_register // R4 is reserved for the entrypoint address. 2367 \macro_for_register r5 2368 \macro_for_register r6 2369 \macro_for_register r7 2370 \macro_for_register r8 2371 \macro_for_register r9 2372 \macro_for_register r10 2373 \macro_for_register r11 2374 .endm 2375 2376 .macro BRBMI_FOR_REGISTERS macro_for_register, macro_for_reserved_register 2377 BRBMI_FOR_12_REGISTERS \macro_for_register, \macro_for_reserved_register 2378 \macro_for_reserved_register // IP is reserved. 2379 \macro_for_reserved_register // SP is reserved. 2380 \macro_for_reserved_register // LR is reserved. 2381 \macro_for_reserved_register // PC is reserved. 2382 .endm 2383 2384 .macro BRBMI_RETURN_SWITCH_CASE reg 2385 .Lmark_introspection_return_switch_case_\reg: 2386 mov \reg, ip 2387 bx lr 2388 .endm 2389 2390 .macro BRBMI_BAD_RETURN_SWITCH_CASE 2391 .Lmark_introspection_return_switch_case_bad: 2392 BRBMI_BKPT_FILL_4B 2393 .endm 2394 2395 .macro BRBMI_RETURN_SWITCH_CASE_OFFSET reg 2396 .byte (.Lmark_introspection_return_switch_case_\reg - .Lmark_introspection_return_table) / 2 2397 .endm 2398 2399 .macro BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2400 .byte (.Lmark_introspection_return_switch_case_bad - .Lmark_introspection_return_table) / 2 2401 .endm 2402 2403 #if BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET != BAKER_MARK_INTROSPECTION_ARRAY_LDR_OFFSET 2404 #error "Array and field introspection code sharing requires same LDR offset." 2405 #endif 2406 .macro BRBMI_ARRAY_LOAD index_reg 2407 ldr ip, [ip, \index_reg, lsl #2] // 4 bytes. 2408 b art_quick_read_barrier_mark_introspection // Should be 2 bytes, encoding T2. 2409 .balign 8 // Add padding to 8 bytes. 2410 .endm 2411 2412 .macro BRBMI_BKPT_FILL_4B 2413 bkpt 0 2414 bkpt 0 2415 .endm 2416 2417 .macro BRBMI_BKPT_FILL_8B 2418 BRBMI_BKPT_FILL_4B 2419 BRBMI_BKPT_FILL_4B 2420 .endm 2421 2422 .macro BRBMI_RUNTIME_CALL 2423 // Note: This macro generates exactly 22 bytes of code. The core register 2424 // PUSH and the MOVs are 16-bit instructions, the rest is 32-bit instructions. 2425 2426 push {r0-r3, r7, lr} // Save return address and caller-save registers. 2427 .cfi_adjust_cfa_offset 24 2428 .cfi_rel_offset r0, 0 2429 .cfi_rel_offset r1, 4 2430 .cfi_rel_offset r2, 8 2431 .cfi_rel_offset r3, 12 2432 .cfi_rel_offset r7, 16 2433 .cfi_rel_offset lr, 20 2434 2435 mov r0, ip // Pass the reference. 2436 vpush {s0-s15} // save floating-point caller-save registers 2437 .cfi_adjust_cfa_offset 64 2438 bl artReadBarrierMark // r0 <- artReadBarrierMark(obj) 2439 vpop {s0-s15} // restore floating-point registers 2440 .cfi_adjust_cfa_offset -64 2441 mov ip, r0 // Move reference to ip in preparation for return switch. 2442 2443 pop {r0-r3, r7, lr} // Restore registers. 2444 .cfi_adjust_cfa_offset -24 2445 .cfi_restore r0 2446 .cfi_restore r1 2447 .cfi_restore r2 2448 .cfi_restore r3 2449 .cfi_restore r7 2450 .cfi_restore lr 2451 .endm 2452 2453 .macro BRBMI_CHECK_NULL_AND_MARKED label_suffix 2454 // If reference is null, just return it in the right register. 2455 cmp ip, #0 2456 beq .Lmark_introspection_return\label_suffix 2457 // Use R4 as temp and check the mark bit of the reference. 2458 ldr r4, [ip, #MIRROR_OBJECT_LOCK_WORD_OFFSET] 2459 tst r4, #LOCK_WORD_MARK_BIT_MASK_SHIFTED 2460 beq .Lmark_introspection_unmarked\label_suffix 2461 .Lmark_introspection_return\label_suffix: 2462 .endm 2463 2464 .macro BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK label_suffix 2465 .Lmark_introspection_unmarked\label_suffix: 2466 // Check if the top two bits are one, if this is the case it is a forwarding address. 2467 #if (LOCK_WORD_STATE_SHIFT != 30) || (LOCK_WORD_STATE_FORWARDING_ADDRESS != 3) 2468 // To use "CMP ip, #modified-immediate; BHS", we need the lock word state in 2469 // the highest bits and the "forwarding address" state to have all bits set. 2470 #error "Unexpected lock word state shift or forwarding address state value." 2471 #endif 2472 cmp r4, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT) 2473 bhs .Lmark_introspection_forwarding_address\label_suffix 2474 .endm 2475 2476 .macro BRBMI_EXTRACT_FORWARDING_ADDRESS label_suffix 2477 .Lmark_introspection_forwarding_address\label_suffix: 2478 // Note: This macro generates exactly 22 bytes of code, the branch is near. 2479 2480 // Shift left by the forwarding address shift. This clears out the state bits since they are 2481 // in the top 2 bits of the lock word. 2482 lsl ip, r4, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 2483 b .Lmark_introspection_return\label_suffix 2484 .endm 2485 2486 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_wide ldr_offset 2487 // Load the half of the instruction that contains Rt. Adjust for the thumb state in LR. 2488 ldrh r4, [lr, #(-1 + \ldr_offset + 2)] 2489 .endm 2490 2491 .macro BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow ldr_offset 2492 // Load the 16-bit instruction. Adjust for the thumb state in LR. 2493 ldrh r4, [lr, #(-1 + \ldr_offset)] 2494 .endm 2495 2496 .macro BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH gc_root_ldr_offset, label_suffix 2497 .balign 64 2498 .thumb_func 2499 .type art_quick_read_barrier_mark_introspection_gc_roots\label_suffix, #function 2500 .hidden art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2501 .global art_quick_read_barrier_mark_introspection_gc_roots\label_suffix 2502 art_quick_read_barrier_mark_introspection_gc_roots\label_suffix: 2503 BRBMI_RUNTIME_CALL 2504 // Load the LDR (or the half of it) that contains Rt. 2505 BRBMI_LOAD_RETURN_REG_FROM_CODE\label_suffix \gc_root_ldr_offset 2506 b .Lmark_introspection_extract_register_and_return\label_suffix 2507 // We've used 28 bytes since the "gc_roots" entrypoint (22 bytes for 2508 // BRBMI_RUNTIME_CALL, 4 bytes for LDRH and 2 bytes for the branch). Squeeze 2509 // the 6 byte forwarding address extraction here across the 32-byte boundary. 2510 BRBMI_EXTRACT_FORWARDING_ADDRESS \label_suffix 2511 // And the slow path taking exactly 30 bytes (6 bytes for the forwarding 2512 // address check, 22 bytes for BRBMI_RUNTIME_CALL and 2 bytes for the near 2513 // branch) shall take the rest of the 32-byte section (within a cache line). 2514 BRBMI_UNMARKED_FORWARDING_ADDRESS_CHECK \label_suffix 2515 BRBMI_RUNTIME_CALL 2516 b .Lmark_introspection_return\label_suffix 2517 .endm 2518 2519 /* 2520 * Use introspection to load a reference from the same address as the LDR 2521 * instruction in generated code would load (unless loaded by the thunk, 2522 * see below), call ReadBarrier::Mark() with that reference if needed 2523 * and return it in the same register as the LDR instruction would load. 2524 * 2525 * The entrypoint is called through a thunk that differs across load kinds. 2526 * For field and array loads the LDR instruction in generated code follows 2527 * the branch to the thunk, i.e. the LDR is (ignoring the heap poisoning) 2528 * at [LR, #(-4 - 1)] (encoding T3) or [LR, #(-2 - 1)] (encoding T1) where 2529 * the -1 is an adjustment for the Thumb mode bit in LR, and the thunk 2530 * knows the holder and performs the gray bit check, returning to the LDR 2531 * instruction if the object is not gray, so this entrypoint no longer 2532 * needs to know anything about the holder. For GC root loads, the LDR 2533 * instruction in generated code precedes the branch to the thunk, i.e. the 2534 * LDR is at [LR, #(-8 - 1)] (encoding T3) or [LR, #(-6 - 1)] (encoding T1) 2535 * where the -1 is again the Thumb mode bit adjustment, and the thunk does 2536 * not do the gray bit check. 2537 * 2538 * For field accesses and array loads with a constant index the thunk loads 2539 * the reference into IP using introspection and calls the main entrypoint, 2540 * art_quick_read_barrier_mark_introspection. With heap poisoning enabled, 2541 * the passed reference is poisoned. 2542 * 2543 * For array accesses with non-constant index, the thunk inserts the bits 2544 * 0-5 of the LDR instruction to the entrypoint address, effectively 2545 * calculating a switch case label based on the index register (bits 0-3) 2546 * and adding an extra offset (bits 4-5 hold the shift which is always 2 2547 * for reference loads) to differentiate from the main entrypoint, then 2548 * moves the base register to IP and jumps to the switch case. Therefore 2549 * we need to align the main entrypoint to 512 bytes, accounting for 2550 * a 256-byte offset followed by 16 array entrypoints starting at 2551 * art_quick_read_barrier_mark_introspection_arrays, each containing an LDR 2552 * (register) and a branch to the main entrypoint. 2553 * 2554 * For GC root accesses we cannot use the main entrypoint because of the 2555 * different offset where the LDR instruction in generated code is located. 2556 * (And even with heap poisoning enabled, GC roots are not poisoned.) 2557 * To re-use the same entrypoint pointer in generated code, we make sure 2558 * that the gc root entrypoint (a copy of the entrypoint with a different 2559 * offset for introspection loads) is located at a known offset (128 bytes, 2560 * or BAKER_MARK_INTROSPECTION_GC_ROOT_ENTRYPOINT_OFFSET) from the main 2561 * entrypoint and the GC root thunk adjusts the entrypoint pointer, moves 2562 * the root register to IP and jumps to the customized entrypoint, 2563 * art_quick_read_barrier_mark_introspection_gc_roots. The thunk also 2564 * performs all the fast-path checks, so we need just the slow path. 2565 * 2566 * The code structure is 2567 * art_quick_read_barrier_mark_introspection: 2568 * Up to 32 bytes code for main entrypoint fast-path code for fields 2569 * (and array elements with constant offset) with LDR encoding T3; 2570 * jumps to the switch in the "narrow" entrypoint. 2571 * Padding to 32 bytes if needed. 2572 * art_quick_read_barrier_mark_introspection_narrow: 2573 * Up to 48 bytes code for fast path code for fields (and array 2574 * elements with constant offset) with LDR encoding T1, ending in the 2575 * return switch instruction TBB and the table with switch offsets. 2576 * Padding to 80 bytes if needed. 2577 * .Lmark_introspection_return_switch_case_r0: 2578 * Exactly 48 bytes of code for the return switch cases (12 cases, 2579 * including BKPT for the reserved registers). 2580 * Ends at 128 bytes total. 2581 * art_quick_read_barrier_mark_introspection_gc_roots_wide: 2582 * GC root entrypoint code for LDR encoding T3 (28 bytes). 2583 * Forwarding address extraction for LDR encoding T3 (6 bytes). 2584 * Slow path for main entrypoint for LDR encoding T3 (30 bytes). 2585 * Ends at 192 bytes total. 2586 * art_quick_read_barrier_mark_introspection_gc_roots_narrow: 2587 * GC root entrypoint code for LDR encoding T1 (28 bytes). 2588 * Forwarding address extraction for LDR encoding T1 (6 bytes). 2589 * Slow path for main entrypoint for LDR encoding T1 (30 bytes). 2590 * Ends at 256 bytes total. 2591 * art_quick_read_barrier_mark_introspection_arrays: 2592 * Exactly 128 bytes for array load switch cases (16x2 instructions). 2593 */ 2594 .balign 512 2595 ENTRY art_quick_read_barrier_mark_introspection 2596 // At this point, IP contains the reference, R4 can be freely used. 2597 // (R4 is reserved for the entrypoint address.) 2598 // For heap poisoning, the reference is poisoned, so unpoison it first. 2599 UNPOISON_HEAP_REF ip 2600 // Check for null or marked, lock word is loaded into IP. 2601 BRBMI_CHECK_NULL_AND_MARKED _wide 2602 // Load the half of the instruction that contains Rt. 2603 BRBMI_LOAD_RETURN_REG_FROM_CODE_wide BAKER_MARK_INTROSPECTION_FIELD_LDR_WIDE_OFFSET 2604 .Lmark_introspection_extract_register_and_return_wide: 2605 lsr r4, r4, #12 // Extract `ref_reg`. 2606 b .Lmark_introspection_return_switch 2607 2608 .balign 32 2609 .thumb_func 2610 .type art_quick_read_barrier_mark_introspection_narrow, #function 2611 .hidden art_quick_read_barrier_mark_introspection_narrow 2612 .global art_quick_read_barrier_mark_introspection_narrow 2613 art_quick_read_barrier_mark_introspection_narrow: 2614 // At this point, IP contains the reference, R4 can be freely used. 2615 // (R4 is reserved for the entrypoint address.) 2616 // For heap poisoning, the reference is poisoned, so unpoison it first. 2617 UNPOISON_HEAP_REF ip 2618 // Check for null or marked, lock word is loaded into R4. 2619 BRBMI_CHECK_NULL_AND_MARKED _narrow 2620 // Load the 16-bit instruction. 2621 BRBMI_LOAD_RETURN_REG_FROM_CODE_narrow BAKER_MARK_INTROSPECTION_FIELD_LDR_NARROW_OFFSET 2622 .Lmark_introspection_extract_register_and_return_narrow: 2623 and r4, r4, #7 // Extract `ref_reg`. 2624 .Lmark_introspection_return_switch: 2625 tbb [pc, r4] // Jump to the switch case. 2626 .Lmark_introspection_return_table: 2627 BRBMI_FOR_REGISTERS BRBMI_RETURN_SWITCH_CASE_OFFSET, BRBMI_BAD_RETURN_SWITCH_CASE_OFFSET 2628 .balign 16 2629 BRBMI_FOR_12_REGISTERS BRBMI_RETURN_SWITCH_CASE, BRBMI_BAD_RETURN_SWITCH_CASE 2630 2631 BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_WIDE_OFFSET, _wide 2632 BRBMI_GC_ROOT_AND_FIELD_SLOW_PATH BAKER_MARK_INTROSPECTION_GC_ROOT_LDR_NARROW_OFFSET, _narrow 2633 2634 .balign 256 2635 .thumb_func 2636 .type art_quick_read_barrier_mark_introspection_arrays, #function 2637 .hidden art_quick_read_barrier_mark_introspection_arrays 2638 .global art_quick_read_barrier_mark_introspection_arrays 2639 art_quick_read_barrier_mark_introspection_arrays: 2640 BRBMI_FOR_REGISTERS BRBMI_ARRAY_LOAD, BRBMI_BKPT_FILL_8B 2641 END art_quick_read_barrier_mark_introspection 2642 2643 .extern artInvokePolymorphic 2644 ENTRY art_quick_invoke_polymorphic 2645 SETUP_SAVE_REFS_AND_ARGS_FRAME r2 2646 mov r2, r9 @ pass Thread::Current 2647 mov r3, sp @ pass SP 2648 mov r0, #0 @ initialize 64-bit JValue as zero. 2649 str r0, [sp, #-4]! 2650 .cfi_adjust_cfa_offset 4 2651 str r0, [sp, #-4]! 2652 .cfi_adjust_cfa_offset 4 2653 mov r0, sp @ pass JValue for return result as first argument. 2654 bl artInvokePolymorphic @ artInvokePolymorphic(JValue, receiver, Thread*, SP) 2655 sub r0, 'A' @ return value is descriptor of handle's return type. 2656 cmp r0, 'Z' - 'A' @ check if value is in bounds of handler table 2657 bgt .Lcleanup_and_return @ and clean-up if not. 2658 adr r1, .Lhandler_table 2659 tbb [r0, r1] @ branch to handler for return value based on return type. 2660 2661 .Lstart_of_handlers: 2662 .Lstore_boolean_result: 2663 ldrb r0, [sp] @ Copy boolean value to return value of this function. 2664 b .Lcleanup_and_return 2665 .Lstore_char_result: 2666 ldrh r0, [sp] @ Copy char value to return value of this function. 2667 b .Lcleanup_and_return 2668 .Lstore_float_result: 2669 vldr s0, [sp] @ Copy float value from JValue result to the context restored by 2670 vstr s0, [sp, #16] @ RESTORE_SAVE_REFS_AND_ARGS_FRAME. 2671 b .Lcleanup_and_return 2672 .Lstore_double_result: 2673 vldr d0, [sp] @ Copy double value from JValue result to the context restored by 2674 vstr d0, [sp, #16] @ RESTORE_SAVE_REFS_AND_ARGS_FRAME. 2675 b .Lcleanup_and_return 2676 .Lstore_long_result: 2677 ldr r1, [sp, #4] @ Copy the upper bits from JValue result to the context restored by 2678 str r1, [sp, #80] @ RESTORE_SAVE_REFS_AND_ARGS_FRAME. 2679 // Fall-through for lower bits. 2680 .Lstore_int_result: 2681 ldr r0, [sp] @ Copy int value to return value of this function. 2682 // Fall-through to clean up and return. 2683 .Lcleanup_and_return: 2684 add sp, #8 2685 .cfi_adjust_cfa_offset -8 2686 RESTORE_SAVE_REFS_AND_ARGS_FRAME 2687 REFRESH_MARKING_REGISTER 2688 RETURN_OR_DELIVER_PENDING_EXCEPTION_REG r2 2689 2690 .macro HANDLER_TABLE_OFFSET handler_label 2691 .byte (\handler_label - .Lstart_of_handlers) / 2 2692 .endm 2693 2694 .Lhandler_table: 2695 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // A 2696 HANDLER_TABLE_OFFSET(.Lstore_int_result) // B (byte) 2697 HANDLER_TABLE_OFFSET(.Lstore_char_result) // C (char) 2698 HANDLER_TABLE_OFFSET(.Lstore_double_result) // D (double) 2699 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // E 2700 HANDLER_TABLE_OFFSET(.Lstore_float_result) // F (float) 2701 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // G 2702 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // H 2703 HANDLER_TABLE_OFFSET(.Lstore_int_result) // I (int) 2704 HANDLER_TABLE_OFFSET(.Lstore_long_result) // J (long) 2705 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // K 2706 HANDLER_TABLE_OFFSET(.Lstore_int_result) // L (object) 2707 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // M 2708 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // N 2709 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // O 2710 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // P 2711 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // Q 2712 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // R 2713 HANDLER_TABLE_OFFSET(.Lstore_int_result) // S (short) 2714 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // T 2715 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // U 2716 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // V (void) 2717 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // W 2718 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // X 2719 HANDLER_TABLE_OFFSET(.Lcleanup_and_return) // Y 2720 HANDLER_TABLE_OFFSET(.Lstore_boolean_result) // Z (boolean) 2721 .purgem HANDLER_TABLE_OFFSET 2722 END art_quick_invoke_polymorphic 2723 2724 // Wrap ExecuteSwitchImpl in assembly method which specifies DEX PC for unwinding. 2725 // Argument 0: r0: The context pointer for ExecuteSwitchImpl. 2726 // Argument 1: r1: Pointer to the templated ExecuteSwitchImpl to call. 2727 // Argument 2: r2: The value of DEX PC (memory address of the methods bytecode). 2728 ENTRY ExecuteSwitchImplAsm 2729 push {r4, lr} // 2 words of callee saves. 2730 .cfi_adjust_cfa_offset 8 2731 .cfi_rel_offset r4, 0 2732 .cfi_rel_offset lr, 4 2733 mov r4, r2 // r4 = DEX PC 2734 CFI_DEFINE_DEX_PC_WITH_OFFSET(0 /* r0 */, 4 /* r4 */, 0) 2735 blx r1 // Call the wrapped method. 2736 pop {r4, pc} 2737 END ExecuteSwitchImplAsm 2738