Home | History | Annotate | Download | only in out
      1 /*
      2  * This file was generated automatically by gen-template.py for 'armv7-a-neon'.
      3  *
      4  * --> DO NOT EDIT <--
      5  */
      6 
      7 /* File: armv5te/header.S */
      8 /*
      9  * Copyright (C) 2008 The Android Open Source Project
     10  *
     11  * Licensed under the Apache License, Version 2.0 (the "License");
     12  * you may not use this file except in compliance with the License.
     13  * You may obtain a copy of the License at
     14  *
     15  *      http://www.apache.org/licenses/LICENSE-2.0
     16  *
     17  * Unless required by applicable law or agreed to in writing, software
     18  * distributed under the License is distributed on an "AS IS" BASIS,
     19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     20  * See the License for the specific language governing permissions and
     21  * limitations under the License.
     22  */
     23 
     24 #if defined(WITH_JIT)
     25 
     26 /*
     27  * ARMv5 definitions and declarations.
     28  */
     29 
     30 /*
     31 ARM EABI general notes:
     32 
     33 r0-r3 hold first 4 args to a method; they are not preserved across method calls
     34 r4-r8 are available for general use
     35 r9 is given special treatment in some situations, but not for us
     36 r10 (sl) seems to be generally available
     37 r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
     38 r12 (ip) is scratch -- not preserved across method calls
     39 r13 (sp) should be managed carefully in case a signal arrives
     40 r14 (lr) must be preserved
     41 r15 (pc) can be tinkered with directly
     42 
     43 r0 holds returns of <= 4 bytes
     44 r0-r1 hold returns of 8 bytes, low word in r0
     45 
     46 Callee must save/restore r4+ (except r12) if it modifies them.
     47 
     48 Stack is "full descending".  Only the arguments that don't fit in the first 4
     49 registers are placed on the stack.  "sp" points at the first stacked argument
     50 (i.e. the 5th arg).
     51 
     52 VFP: single-precision results in s0, double-precision results in d0.
     53 
     54 In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
     55 64-bit quantities (long long, double) must be 64-bit aligned.
     56 */
     57 
     58 /*
     59 JIT and ARM notes:
     60 
     61 The following registers have fixed assignments:
     62 
     63   reg nick      purpose
     64   r5  rFP       interpreted frame pointer, used for accessing locals and args
     65   r6  rSELF     thread pointer
     66 
     67 The following registers have fixed assignments in mterp but are scratch
     68 registers in compiled code
     69 
     70   reg nick      purpose
     71   r4  rPC       interpreted program counter, used for fetching instructions
     72   r7  rINST     first 16-bit code unit of current instruction
     73   r8  rIBASE    interpreted instruction base pointer, used for computed goto
     74 
     75 Macros are provided for common operations.  Each macro MUST emit only
     76 one instruction to make instruction-counting easier.  They MUST NOT alter
     77 unspecified registers or condition codes.
     78 */
     79 
     80 /* single-purpose registers, given names for clarity */
     81 #define rPC     r4
     82 #define rFP     r5
     83 #define rSELF   r6
     84 #define rINST   r7
     85 #define rIBASE  r8
     86 
     87 /*
     88  * Given a frame pointer, find the stack save area.
     89  *
     90  * In C this is "((StackSaveArea*)(_fp) -1)".
     91  */
     92 #define SAVEAREA_FROM_FP(_reg, _fpreg) \
     93     sub     _reg, _fpreg, #sizeofStackSaveArea
     94 
     95 #define EXPORT_PC() \
     96     str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
     97 
     98 /*
     99  * This is a #include, not a %include, because we want the C pre-processor
    100  * to expand the macros into assembler assignment statements.
    101  */
    102 #include "../../../mterp/common/asm-constants.h"
    103 
    104 /* File: armv5te-vfp/platform.S */
    105 /*
    106  * ===========================================================================
    107  *  CPU-version-specific defines and utility
    108  * ===========================================================================
    109  */
    110 
    111 
    112     .global dvmCompilerTemplateStart
    113     .type   dvmCompilerTemplateStart, %function
    114     .text
    115 
    116 dvmCompilerTemplateStart:
    117 
    118 /* ------------------------------ */
    119     .balign 4
    120     .global dvmCompiler_TEMPLATE_CMP_LONG
    121 dvmCompiler_TEMPLATE_CMP_LONG:
    122 /* File: armv5te/TEMPLATE_CMP_LONG.S */
    123     /*
    124      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
    125      * register based on the results of the comparison.
    126      *
    127      * We load the full values with LDM, but in practice many values could
    128      * be resolved by only looking at the high word.  This could be made
    129      * faster or slower by splitting the LDM into a pair of LDRs.
    130      *
    131      * If we just wanted to set condition flags, we could do this:
    132      *  subs    ip, r0, r2
    133      *  sbcs    ip, r1, r3
    134      *  subeqs  ip, r0, r2
    135      * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
    136      * integer value, which we can do with 2 conditional mov/mvn instructions
    137      * (set 1, set -1; if they're equal we already have 0 in ip), giving
    138      * us a constant 5-cycle path plus a branch at the end to the
    139      * instruction epilogue code.  The multi-compare approach below needs
    140      * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
    141      * in the worst case (the 64-bit values are equal).
    142      */
    143     /* cmp-long vAA, vBB, vCC */
    144     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
    145     blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
    146     bgt     .LTEMPLATE_CMP_LONG_greater
    147     subs    r0, r0, r2                  @ r0<- r0 - r2
    148     bxeq     lr
    149     bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
    150 .LTEMPLATE_CMP_LONG_less:
    151     mvn     r0, #0                      @ r0<- -1
    152     bx      lr
    153 .LTEMPLATE_CMP_LONG_greater:
    154     mov     r0, #1                      @ r0<- 1
    155     bx      lr
    156 
    157 /* ------------------------------ */
    158     .balign 4
    159     .global dvmCompiler_TEMPLATE_RETURN
    160 dvmCompiler_TEMPLATE_RETURN:
    161 /* File: armv5te/TEMPLATE_RETURN.S */
    162     /*
    163      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
    164      * If the stored value in returnAddr
    165      * is non-zero, the caller is compiled by the JIT thus return to the
    166      * address in the code cache following the invoke instruction. Otherwise
    167      * return to the special dvmJitToInterpNoChain entry point.
    168      */
    169 #if defined(TEMPLATE_INLINE_PROFILING)
    170     stmfd   sp!, {r0-r2,lr}             @ preserve live registers
    171     mov     r0, r6
    172     @ r0=rSELF
    173     mov     lr, pc
    174     ldr     pc, .LdvmFastMethodTraceExit
    175     ldmfd   sp!, {r0-r2,lr}             @ restore live registers
    176 #endif
    177     SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
    178     ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
    179     ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
    180     ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
    181 #if !defined(WITH_SELF_VERIFICATION)
    182     ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
    183 #else
    184     mov     r9, #0                      @ disable chaining
    185 #endif
    186     ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
    187                                         @ r2<- method we're returning to
    188     cmp     r2, #0                      @ break frame?
    189 #if !defined(WITH_SELF_VERIFICATION)
    190     beq     1f                          @ bail to interpreter
    191 #else
    192     blxeq   lr                          @ punt to interpreter and compare state
    193 #endif
    194     ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
    195     mov     rFP, r10                    @ publish new FP
    196     ldr     r10, [r2, #offMethod_clazz] @ r10<- method->clazz
    197 
    198     str     r2, [rSELF, #offThread_method]@ self->method = newSave->method
    199     ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
    200     str     rFP, [rSELF, #offThread_curFrame] @ curFrame = fp
    201     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
    202     str     r0, [rSELF, #offThread_methodClassDex]
    203     cmp     r8, #0                      @ check the break flags
    204     movne   r9, #0                      @ clear the chaining cell address
    205     str     r9, [rSELF, #offThread_inJitCodeCache] @ in code cache or not
    206     cmp     r9, #0                      @ chaining cell exists?
    207     blxne   r9                          @ jump to the chaining cell
    208 #if defined(WITH_JIT_TUNING)
    209     mov     r0, #kCallsiteInterpreted
    210 #endif
    211     mov     pc, r1                      @ callsite is interpreted
    212 1:
    213     mov     r0, #0
    214     str     r0, [rSELF, #offThread_inJitCodeCache] @ reset inJitCodeCache
    215     stmia   rSELF, {rPC, rFP}           @ SAVE_PC_FP_TO_SELF()
    216     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
    217     mov     r0, rSELF                   @ Expecting rSELF in r0
    218     blx     r2                          @ exit the interpreter
    219 
    220 /* ------------------------------ */
    221     .balign 4
    222     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
    223 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
    224 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
    225     /*
    226      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
    227      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
    228      * runtime-resolved callee.
    229      */
    230     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
    231     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
    232     ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
    233     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
    234     ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
    235     add     r3, r1, #1  @ Thumb addr is odd
    236     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    237     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    238     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    239     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
    240     cmp     r10, r9                     @ bottom < interpStackEnd?
    241     bxlo    lr                          @ return to raise stack overflow excep.
    242     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    243     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
    244     ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
    245     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    246     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    247     ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
    248 
    249 
    250     @ set up newSaveArea
    251     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    252     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    253     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    254     cmp     r8, #0                      @ breakFlags != 0
    255     bxne    lr                          @ bail to the interpreter
    256     tst     r10, #ACC_NATIVE
    257 #if !defined(WITH_SELF_VERIFICATION)
    258     bne     .LinvokeNative
    259 #else
    260     bxne    lr                          @ bail to the interpreter
    261 #endif
    262 
    263     ldr     r10, .LdvmJitToInterpTraceSelectNoChain
    264     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
    265 
    266     @ Update "thread" values for the new method
    267     str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
    268     str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
    269     mov     rFP, r1                         @ fp = newFp
    270     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
    271 #if defined(TEMPLATE_INLINE_PROFILING)
    272     stmfd   sp!, {r0-r3}                    @ preserve r0-r3
    273     mov     r1, r6
    274     @ r0=methodToCall, r1=rSELF
    275     mov     lr, pc
    276     ldr     pc, .LdvmFastMethodTraceEnter
    277     ldmfd   sp!, {r0-r3}                    @ restore r0-r3
    278 #endif
    279 
    280     @ Start executing the callee
    281 #if defined(WITH_JIT_TUNING)
    282     mov     r0, #kInlineCacheMiss
    283 #endif
    284     mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
    285 
    286 /* ------------------------------ */
    287     .balign 4
    288     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
    289 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
    290 /* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
    291     /*
    292      * For monomorphic callsite, setup the Dalvik frame and return to the
    293      * Thumb code through the link register to transfer control to the callee
    294      * method through a dedicated chaining cell.
    295      */
    296     @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
    297     @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
    298     @ methodToCall is guaranteed to be non-native
    299 .LinvokeChain:
    300     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
    301     ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
    302     add     r3, r1, #1  @ Thumb addr is odd
    303     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    304     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    305     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    306     add     r12, lr, #2                 @ setup the punt-to-interp address
    307     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
    308     cmp     r10, r9                     @ bottom < interpStackEnd?
    309     bxlo    r12                         @ return to raise stack overflow excep.
    310     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    311     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
    312     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    313     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    314 
    315     @ set up newSaveArea
    316     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    317     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    318     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    319     cmp     r8, #0                      @ breakFlags != 0
    320     bxne    r12                         @ bail to the interpreter
    321 
    322     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
    323 
    324     @ Update "thread" values for the new method
    325     str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
    326     str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
    327     mov     rFP, r1                         @ fp = newFp
    328     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
    329 #if defined(TEMPLATE_INLINE_PROFILING)
    330     stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
    331     mov     r1, r6
    332     @ r0=methodToCall, r1=rSELF
    333     mov     lr, pc
    334     ldr     pc, .LdvmFastMethodTraceEnter
    335     ldmfd   sp!, {r0-r2,lr}             @ restore registers
    336 #endif
    337 
    338     bx      lr                              @ return to the callee-chaining cell
    339 
    340 /* ------------------------------ */
    341     .balign 4
    342     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
    343 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
    344 /* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
    345     /*
    346      * For polymorphic callsite, check whether the cached class pointer matches
    347      * the current one. If so setup the Dalvik frame and return to the
    348      * Thumb code through the link register to transfer control to the callee
    349      * method through a dedicated chaining cell.
    350      *
    351      * The predicted chaining cell is declared in ArmLIR.h with the
    352      * following layout:
    353      *
    354      *  typedef struct PredictedChainingCell {
    355      *      u4 branch;
    356      *      const ClassObject *clazz;
    357      *      const Method *method;
    358      *      u4 counter;
    359      *  } PredictedChainingCell;
    360      *
    361      * Upon returning to the callsite:
    362      *    - lr  : to branch to the chaining cell
    363      *    - lr+2: to punt to the interpreter
    364      *    - lr+4: to fully resolve the callee and may rechain.
    365      *            r3 <- class
    366      *            r9 <- counter
    367      */
    368     @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
    369     ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
    370     ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
    371     ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
    372     ldr     r9, [rSELF, #offThread_icRechainCount] @ r1 <- shared rechainCount
    373     cmp     r3, r8          @ predicted class == actual class?
    374 #if defined(WITH_JIT_TUNING)
    375     ldr     r7, .LdvmICHitCount
    376 #if defined(WORKAROUND_CORTEX_A9_745320)
    377     /* Don't use conditional loads if the HW defect exists */
    378     bne     101f
    379     ldr     r10, [r7, #0]
    380 101:
    381 #else
    382     ldreq   r10, [r7, #0]
    383 #endif
    384     add     r10, r10, #1
    385     streq   r10, [r7, #0]
    386 #endif
    387     ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
    388     ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
    389     beq     .LinvokeChain   @ predicted chain is valid
    390     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
    391     cmp     r8, #0          @ initialized class or not
    392     moveq   r1, #0
    393     subne   r1, r9, #1      @ count--
    394     strne   r1, [rSELF, #offThread_icRechainCount]  @ write back to thread
    395     add     lr, lr, #4      @ return to fully-resolve landing pad
    396     /*
    397      * r1 <- count
    398      * r2 <- &predictedChainCell
    399      * r3 <- this->class
    400      * r4 <- dPC
    401      * r7 <- this->class->vtable
    402      */
    403     bx      lr
    404 
    405 /* ------------------------------ */
    406     .balign 4
    407     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
    408 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
    409 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
    410     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
    411     @ r7 = methodToCall->registersSize
    412     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
    413     ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
    414     add     r3, r1, #1  @ Thumb addr is odd
    415     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    416     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    417     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    418     cmp     r10, r9                     @ bottom < interpStackEnd?
    419     bxlo    lr                          @ return to raise stack overflow excep.
    420     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    421     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    422     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    423 
    424     @ set up newSaveArea
    425     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    426     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    427     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    428     cmp     r8, #0                      @ breakFlags != 0
    429     ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
    430 #if !defined(WITH_SELF_VERIFICATION)
    431     bxne    lr                          @ bail to the interpreter
    432 #else
    433     bx      lr                          @ bail to interpreter unconditionally
    434 #endif
    435 
    436     @ go ahead and transfer control to the native code
    437     ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
    438     mov     r2, #0
    439     str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
    440     str     r2, [rSELF, #offThread_inJitCodeCache] @ not in the jit code cache
    441     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
    442                                         @ newFp->localRefCookie=top
    443     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
    444 
    445     mov     r2, r0                        @ arg2<- methodToCall
    446     mov     r0, r1                        @ arg0<- newFP
    447     add     r1, rSELF, #offThread_retval  @ arg1<- &retval
    448     mov     r3, rSELF                     @ arg3<- self
    449 #if defined(TEMPLATE_INLINE_PROFILING)
    450     @ r2=methodToCall, r6=rSELF
    451     stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
    452     stmfd   sp!, {r0-r3}                @ preserve r0-r3
    453     mov     r0, r2
    454     mov     r1, r6
    455     @ r0=JNIMethod, r1=rSELF
    456     mov     lr, pc
    457     ldr     pc, .LdvmFastMethodTraceEnter
    458     ldmfd   sp!, {r0-r3}                @ restore r0-r3
    459 #endif
    460 
    461     blx     r8                          @ off to the native code
    462 
    463 #if defined(TEMPLATE_INLINE_PROFILING)
    464     ldmfd   sp!, {r0-r1}                @ restore r2 and r6
    465     @ r0=JNIMethod, r1=rSELF
    466     mov     lr, pc
    467     ldr     pc, .LdvmFastNativeMethodTraceExit
    468 #endif
    469     @ native return; r10=newSaveArea
    470     @ equivalent to dvmPopJniLocals
    471     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
    472     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
    473     ldr     r1, [rSELF, #offThread_exception] @ check for exception
    474     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
    475     cmp     r1, #0                      @ null?
    476     str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
    477     ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    478 
    479     @ r0 = dalvikCallsitePC
    480     bne     .LhandleException           @ no, handle exception
    481 
    482     str     r2, [rSELF, #offThread_inJitCodeCache] @ set the mode properly
    483     cmp     r2, #0                      @ return chaining cell still exists?
    484     bxne    r2                          @ yes - go ahead
    485 
    486     @ continue executing the next instruction through the interpreter
    487     ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
    488     add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
    489 #if defined(WITH_JIT_TUNING)
    490     mov     r0, #kCallsiteInterpreted
    491 #endif
    492     mov     pc, r1
    493 
    494 /* ------------------------------ */
    495     .balign 4
    496     .global dvmCompiler_TEMPLATE_MUL_LONG
    497 dvmCompiler_TEMPLATE_MUL_LONG:
    498 /* File: armv5te/TEMPLATE_MUL_LONG.S */
    499     /*
    500      * Signed 64-bit integer multiply.
    501      *
    502      * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
    503      *
    504      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
    505      *        WX
    506      *      x YZ
    507      *  --------
    508      *     ZW ZX
    509      *  YW YX
    510      *
    511      * The low word of the result holds ZX, the high word holds
    512      * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
    513      * it doesn't fit in the low 64 bits.
    514      *
    515      * Unlike most ARM math operations, multiply instructions have
    516      * restrictions on using the same register more than once (Rd and Rm
    517      * cannot be the same).
    518      */
    519     /* mul-long vAA, vBB, vCC */
    520     mul     ip, r2, r1                  @  ip<- ZxW
    521     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
    522     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
    523     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
    524     mov     r0,r9
    525     mov     r1,r10
    526     bx      lr
    527 
    528 /* ------------------------------ */
    529     .balign 4
    530     .global dvmCompiler_TEMPLATE_SHL_LONG
    531 dvmCompiler_TEMPLATE_SHL_LONG:
    532 /* File: armv5te/TEMPLATE_SHL_LONG.S */
    533     /*
    534      * Long integer shift.  This is different from the generic 32/64-bit
    535      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    536      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    537      * 6 bits.
    538      */
    539     /* shl-long vAA, vBB, vCC */
    540     and     r2, r2, #63                 @ r2<- r2 & 0x3f
    541     mov     r1, r1, asl r2              @  r1<- r1 << r2
    542     rsb     r3, r2, #32                 @  r3<- 32 - r2
    543     orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
    544     subs    ip, r2, #32                 @  ip<- r2 - 32
    545     movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
    546     mov     r0, r0, asl r2              @  r0<- r0 << r2
    547     bx      lr
    548 
    549 /* ------------------------------ */
    550     .balign 4
    551     .global dvmCompiler_TEMPLATE_SHR_LONG
    552 dvmCompiler_TEMPLATE_SHR_LONG:
    553 /* File: armv5te/TEMPLATE_SHR_LONG.S */
    554     /*
    555      * Long integer shift.  This is different from the generic 32/64-bit
    556      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    557      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    558      * 6 bits.
    559      */
    560     /* shr-long vAA, vBB, vCC */
    561     and     r2, r2, #63                 @ r0<- r0 & 0x3f
    562     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
    563     rsb     r3, r2, #32                 @  r3<- 32 - r2
    564     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
    565     subs    ip, r2, #32                 @  ip<- r2 - 32
    566     movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
    567     mov     r1, r1, asr r2              @  r1<- r1 >> r2
    568     bx      lr
    569 
    570 /* ------------------------------ */
    571     .balign 4
    572     .global dvmCompiler_TEMPLATE_USHR_LONG
    573 dvmCompiler_TEMPLATE_USHR_LONG:
    574 /* File: armv5te/TEMPLATE_USHR_LONG.S */
    575     /*
    576      * Long integer shift.  This is different from the generic 32/64-bit
    577      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    578      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    579      * 6 bits.
    580      */
    581     /* ushr-long vAA, vBB, vCC */
    582     and     r2, r2, #63                 @ r0<- r0 & 0x3f
    583     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
    584     rsb     r3, r2, #32                 @  r3<- 32 - r2
    585     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
    586     subs    ip, r2, #32                 @  ip<- r2 - 32
    587     movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
    588     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
    589     bx      lr
    590 
    591 /* ------------------------------ */
    592     .balign 4
    593     .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
    594 dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
    595 /* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
    596 /* File: armv5te-vfp/fbinop.S */
    597     /*
    598      * Generic 32-bit floating point operation.  Provide an "instr" line that
    599      * specifies an instruction that performs s2 = s0 op s1.
    600      *
    601      * On entry:
    602      *     r0 = target dalvik register address
    603      *     r1 = op1 address
    604      *     r2 = op2 address
    605      */
    606      flds    s0,[r1]
    607      flds    s1,[r2]
    608      fadds   s2, s0, s1
    609      fsts    s2,[r0]
    610      bx      lr
    611 
    612 
    613 /* ------------------------------ */
    614     .balign 4
    615     .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
    616 dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
    617 /* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
    618 /* File: armv5te-vfp/fbinop.S */
    619     /*
    620      * Generic 32-bit floating point operation.  Provide an "instr" line that
    621      * specifies an instruction that performs s2 = s0 op s1.
    622      *
    623      * On entry:
    624      *     r0 = target dalvik register address
    625      *     r1 = op1 address
    626      *     r2 = op2 address
    627      */
    628      flds    s0,[r1]
    629      flds    s1,[r2]
    630      fsubs   s2, s0, s1
    631      fsts    s2,[r0]
    632      bx      lr
    633 
    634 
    635 /* ------------------------------ */
    636     .balign 4
    637     .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
    638 dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
    639 /* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
    640 /* File: armv5te-vfp/fbinop.S */
    641     /*
    642      * Generic 32-bit floating point operation.  Provide an "instr" line that
    643      * specifies an instruction that performs s2 = s0 op s1.
    644      *
    645      * On entry:
    646      *     r0 = target dalvik register address
    647      *     r1 = op1 address
    648      *     r2 = op2 address
    649      */
    650      flds    s0,[r1]
    651      flds    s1,[r2]
    652      fmuls   s2, s0, s1
    653      fsts    s2,[r0]
    654      bx      lr
    655 
    656 
    657 /* ------------------------------ */
    658     .balign 4
    659     .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
    660 dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
    661 /* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
    662 /* File: armv5te-vfp/fbinop.S */
    663     /*
    664      * Generic 32-bit floating point operation.  Provide an "instr" line that
    665      * specifies an instruction that performs s2 = s0 op s1.
    666      *
    667      * On entry:
    668      *     r0 = target dalvik register address
    669      *     r1 = op1 address
    670      *     r2 = op2 address
    671      */
    672      flds    s0,[r1]
    673      flds    s1,[r2]
    674      fdivs   s2, s0, s1
    675      fsts    s2,[r0]
    676      bx      lr
    677 
    678 
    679 /* ------------------------------ */
    680     .balign 4
    681     .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
    682 dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
    683 /* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
    684 /* File: armv5te-vfp/fbinopWide.S */
    685     /*
    686      * Generic 64-bit floating point operation.  Provide an "instr" line that
    687      * specifies an instruction that performs s2 = s0 op s1.
    688      *
    689      * On entry:
    690      *     r0 = target dalvik register address
    691      *     r1 = op1 address
    692      *     r2 = op2 address
    693      */
    694      fldd    d0,[r1]
    695      fldd    d1,[r2]
    696      faddd   d2, d0, d1
    697      fstd    d2,[r0]
    698      bx      lr
    699 
    700 
    701 /* ------------------------------ */
    702     .balign 4
    703     .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
    704 dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
    705 /* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
    706 /* File: armv5te-vfp/fbinopWide.S */
    707     /*
    708      * Generic 64-bit floating point operation.  Provide an "instr" line that
    709      * specifies an instruction that performs s2 = s0 op s1.
    710      *
    711      * On entry:
    712      *     r0 = target dalvik register address
    713      *     r1 = op1 address
    714      *     r2 = op2 address
    715      */
    716      fldd    d0,[r1]
    717      fldd    d1,[r2]
    718      fsubd   d2, d0, d1
    719      fstd    d2,[r0]
    720      bx      lr
    721 
    722 
    723 /* ------------------------------ */
    724     .balign 4
    725     .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
    726 dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
    727 /* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
    728 /* File: armv5te-vfp/fbinopWide.S */
    729     /*
    730      * Generic 64-bit floating point operation.  Provide an "instr" line that
    731      * specifies an instruction that performs s2 = s0 op s1.
    732      *
    733      * On entry:
    734      *     r0 = target dalvik register address
    735      *     r1 = op1 address
    736      *     r2 = op2 address
    737      */
    738      fldd    d0,[r1]
    739      fldd    d1,[r2]
    740      fmuld   d2, d0, d1
    741      fstd    d2,[r0]
    742      bx      lr
    743 
    744 
    745 /* ------------------------------ */
    746     .balign 4
    747     .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
    748 dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
    749 /* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
    750 /* File: armv5te-vfp/fbinopWide.S */
    751     /*
    752      * Generic 64-bit floating point operation.  Provide an "instr" line that
    753      * specifies an instruction that performs s2 = s0 op s1.
    754      *
    755      * On entry:
    756      *     r0 = target dalvik register address
    757      *     r1 = op1 address
    758      *     r2 = op2 address
    759      */
    760      fldd    d0,[r1]
    761      fldd    d1,[r2]
    762      fdivd   d2, d0, d1
    763      fstd    d2,[r0]
    764      bx      lr
    765 
    766 
    767 /* ------------------------------ */
    768     .balign 4
    769     .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
    770 dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
    771 /* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
    772 /* File: armv5te-vfp/funopNarrower.S */
    773     /*
    774      * Generic 64bit-to-32bit floating point unary operation.  Provide an
    775      * "instr" line that specifies an instruction that performs "s0 = op d0".
    776      *
    777      * For: double-to-int, double-to-float
    778      *
    779      * On entry:
    780      *     r0 = target dalvik register address
    781      *     r1 = src dalvik register address
    782      */
    783     /* unop vA, vB */
    784     fldd    d0, [r1]                    @ d0<- vB
    785     fcvtsd  s0, d0                              @ s0<- op d0
    786     fsts    s0, [r0]                    @ vA<- s0
    787     bx      lr
    788 
    789 
    790 /* ------------------------------ */
    791     .balign 4
    792     .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
    793 dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
    794 /* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
    795 /* File: armv5te-vfp/funopNarrower.S */
    796     /*
    797      * Generic 64bit-to-32bit floating point unary operation.  Provide an
    798      * "instr" line that specifies an instruction that performs "s0 = op d0".
    799      *
    800      * For: double-to-int, double-to-float
    801      *
    802      * On entry:
    803      *     r0 = target dalvik register address
    804      *     r1 = src dalvik register address
    805      */
    806     /* unop vA, vB */
    807     fldd    d0, [r1]                    @ d0<- vB
    808     ftosizd  s0, d0                              @ s0<- op d0
    809     fsts    s0, [r0]                    @ vA<- s0
    810     bx      lr
    811 
    812 
    813 /* ------------------------------ */
    814     .balign 4
    815     .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
    816 dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
    817 /* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
    818 /* File: armv5te-vfp/funopWider.S */
    819     /*
    820      * Generic 32bit-to-64bit floating point unary operation.  Provide an
    821      * "instr" line that specifies an instruction that performs "d0 = op s0".
    822      *
    823      * For: int-to-double, float-to-double
    824      *
    825      * On entry:
    826      *     r0 = target dalvik register address
    827      *     r1 = src dalvik register address
    828      */
    829     /* unop vA, vB */
    830     flds    s0, [r1]                    @ s0<- vB
    831     fcvtds  d0, s0                              @ d0<- op s0
    832     fstd    d0, [r0]                    @ vA<- d0
    833     bx      lr
    834 
    835 
    836 /* ------------------------------ */
    837     .balign 4
    838     .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
    839 dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
    840 /* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
    841 /* File: armv5te-vfp/funop.S */
    842     /*
    843      * Generic 32bit-to-32bit floating point unary operation.  Provide an
    844      * "instr" line that specifies an instruction that performs "s1 = op s0".
    845      *
    846      * For: float-to-int, int-to-float
    847      *
    848      * On entry:
    849      *     r0 = target dalvik register address
    850      *     r1 = src dalvik register address
    851      */
    852     /* unop vA, vB */
    853     flds    s0, [r1]                    @ s0<- vB
    854     ftosizs s1, s0                              @ s1<- op s0
    855     fsts    s1, [r0]                    @ vA<- s1
    856     bx      lr
    857 
    858 
    859 /* ------------------------------ */
    860     .balign 4
    861     .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
    862 dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
    863 /* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
    864 /* File: armv5te-vfp/funopWider.S */
    865     /*
    866      * Generic 32bit-to-64bit floating point unary operation.  Provide an
    867      * "instr" line that specifies an instruction that performs "d0 = op s0".
    868      *
    869      * For: int-to-double, float-to-double
    870      *
    871      * On entry:
    872      *     r0 = target dalvik register address
    873      *     r1 = src dalvik register address
    874      */
    875     /* unop vA, vB */
    876     flds    s0, [r1]                    @ s0<- vB
    877     fsitod  d0, s0                              @ d0<- op s0
    878     fstd    d0, [r0]                    @ vA<- d0
    879     bx      lr
    880 
    881 
    882 /* ------------------------------ */
    883     .balign 4
    884     .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
    885 dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
    886 /* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
    887 /* File: armv5te-vfp/funop.S */
    888     /*
    889      * Generic 32bit-to-32bit floating point unary operation.  Provide an
    890      * "instr" line that specifies an instruction that performs "s1 = op s0".
    891      *
    892      * For: float-to-int, int-to-float
    893      *
    894      * On entry:
    895      *     r0 = target dalvik register address
    896      *     r1 = src dalvik register address
    897      */
    898     /* unop vA, vB */
    899     flds    s0, [r1]                    @ s0<- vB
    900     fsitos  s1, s0                              @ s1<- op s0
    901     fsts    s1, [r0]                    @ vA<- s1
    902     bx      lr
    903 
    904 
    905 /* ------------------------------ */
    906     .balign 4
    907     .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
    908 dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
    909 /* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
    910     /*
    911      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    912      * destination register based on the results of the comparison.
    913      *
    914      * int compare(x, y) {
    915      *     if (x == y) {
    916      *         return 0;
    917      *     } else if (x < y) {
    918      *         return -1;
    919      *     } else if (x > y) {
    920      *         return 1;
    921      *     } else {
    922      *         return 1;
    923      *     }
    924      * }
    925      *
    926      * On entry:
    927      *    r0 = &op1 [vBB]
    928      *    r1 = &op2 [vCC]
    929      */
    930     /* op vAA, vBB, vCC */
    931     fldd    d0, [r0]                    @ d0<- vBB
    932     fldd    d1, [r1]                    @ d1<- vCC
    933     fcmpd  d0, d1                       @ compare (vBB, vCC)
    934     mov     r0, #1                      @ r0<- 1 (default)
    935     fmstat                              @ export status flags
    936     mvnmi   r0, #0                      @ (less than) r0<- -1
    937     moveq   r0, #0                      @ (equal) r0<- 0
    938     bx      lr
    939 
    940 /* ------------------------------ */
    941     .balign 4
    942     .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
    943 dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
    944 /* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
    945     /*
    946      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    947      * destination register based on the results of the comparison.
    948      *
    949      * int compare(x, y) {
    950      *     if (x == y) {
    951      *         return 0;
    952      *     } else if (x > y) {
    953      *         return 1;
    954      *     } else if (x < y) {
    955      *         return -1;
    956      *     } else {
    957      *         return -1;
    958      *     }
    959      * }
    960      * On entry:
    961      *    r0 = &op1 [vBB]
    962      *    r1 = &op2 [vCC]
    963      */
    964     /* op vAA, vBB, vCC */
    965     fldd    d0, [r0]                    @ d0<- vBB
    966     fldd    d1, [r1]                    @ d1<- vCC
    967     fcmped  d0, d1                      @ compare (vBB, vCC)
    968     mvn     r0, #0                      @ r0<- -1 (default)
    969     fmstat                              @ export status flags
    970     movgt   r0, #1                      @ (greater than) r0<- 1
    971     moveq   r0, #0                      @ (equal) r0<- 0
    972     bx      lr
    973 
    974 /* ------------------------------ */
    975     .balign 4
    976     .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
    977 dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
    978 /* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
    979     /*
    980      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    981      * destination register based on the results of the comparison.
    982      *
    983      * int compare(x, y) {
    984      *     if (x == y) {
    985      *         return 0;
    986      *     } else if (x < y) {
    987      *         return -1;
    988      *     } else if (x > y) {
    989      *         return 1;
    990      *     } else {
    991      *         return 1;
    992      *     }
    993      * }
    994      * On entry:
    995      *    r0 = &op1 [vBB]
    996      *    r1 = &op2 [vCC]
    997      */
    998     /* op vAA, vBB, vCC */
    999     flds    s0, [r0]                    @ d0<- vBB
   1000     flds    s1, [r1]                    @ d1<- vCC
   1001     fcmps  s0, s1                      @ compare (vBB, vCC)
   1002     mov     r0, #1                      @ r0<- 1 (default)
   1003     fmstat                              @ export status flags
   1004     mvnmi   r0, #0                      @ (less than) r0<- -1
   1005     moveq   r0, #0                      @ (equal) r0<- 0
   1006     bx      lr
   1007 
   1008 /* ------------------------------ */
   1009     .balign 4
   1010     .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
   1011 dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
   1012 /* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
   1013     /*
   1014      * Compare two floating-point values.  Puts 0, 1, or -1 into the
   1015      * destination register based on the results of the comparison.
   1016      *
   1017      * int compare(x, y) {
   1018      *     if (x == y) {
   1019      *         return 0;
   1020      *     } else if (x > y) {
   1021      *         return 1;
   1022      *     } else if (x < y) {
   1023      *         return -1;
   1024      *     } else {
   1025      *         return -1;
   1026      *     }
   1027      * }
   1028      * On entry:
   1029      *    r0 = &op1 [vBB]
   1030      *    r1 = &op2 [vCC]
   1031      */
   1032     /* op vAA, vBB, vCC */
   1033     flds    s0, [r0]                    @ d0<- vBB
   1034     flds    s1, [r1]                    @ d1<- vCC
   1035     fcmps  s0, s1                      @ compare (vBB, vCC)
   1036     mvn     r0, #0                      @ r0<- -1 (default)
   1037     fmstat                              @ export status flags
   1038     movgt   r0, #1                      @ (greater than) r0<- 1
   1039     moveq   r0, #0                      @ (equal) r0<- 0
   1040     bx      lr
   1041 
   1042 /* ------------------------------ */
   1043     .balign 4
   1044     .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
   1045 dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
   1046 /* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
   1047     /*
   1048      * 64-bit floating point vfp sqrt operation.
   1049      * If the result is a NaN, bail out to library code to do
   1050      * the right thing.
   1051      *
   1052      * On entry:
   1053      *     r2 src addr of op1
   1054      * On exit:
   1055      *     r0,r1 = res
   1056      */
   1057     fldd    d0, [r2]
   1058     fsqrtd  d1, d0
   1059     fcmpd   d1, d1
   1060     fmstat
   1061     fmrrd   r0, r1, d1
   1062     bxeq    lr   @ Result OK - return
   1063     ldr     r2, .Lsqrt
   1064     fmrrd   r0, r1, d0   @ reload orig operand
   1065     bx      r2   @ tail call to sqrt library routine
   1066 
   1067 .Lsqrt:
   1068     .word   sqrt
   1069 
   1070 /* ------------------------------ */
   1071     .balign 4
   1072     .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
   1073 dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
   1074 /* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
   1075     /*
   1076      * Throw an exception from JIT'ed code.
   1077      * On entry:
   1078      *    r0    Dalvik PC that raises the exception
   1079      */
   1080     b       .LhandleException
   1081 
   1082 /* ------------------------------ */
   1083     .balign 4
   1084     .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
   1085 dvmCompiler_TEMPLATE_MEM_OP_DECODE:
   1086 /* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
   1087 #if defined(WITH_SELF_VERIFICATION)
   1088     /*
   1089      * This handler encapsulates heap memory ops for selfVerification mode.
   1090      *
   1091      * The call to the handler is inserted prior to a heap memory operation.
   1092      * This handler then calls a function to decode the memory op, and process
   1093      * it accordingly. Afterwards, the handler changes the return address to
   1094      * skip the memory op so it never gets executed.
   1095      */
   1096     vpush   {d0-d15}                    @ save out all fp registers
   1097     push    {r0-r12,lr}                 @ save out all registers
   1098     ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
   1099     mov     r0, lr                      @ arg0 <- link register
   1100     mov     r1, sp                      @ arg1 <- stack pointer
   1101     blx     r2                          @ decode and handle the mem op
   1102     pop     {r0-r12,lr}                 @ restore all registers
   1103     vpop    {d0-d15}                    @ restore all fp registers
   1104     bx      lr                          @ return to compiled code
   1105 #endif
   1106 
   1107 /* ------------------------------ */
   1108     .balign 4
   1109     .global dvmCompiler_TEMPLATE_STRING_COMPARETO
   1110 dvmCompiler_TEMPLATE_STRING_COMPARETO:
   1111 /* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
   1112     /*
   1113      * String's compareTo.
   1114      *
   1115      * Requires r0/r1 to have been previously checked for null.  Will
   1116      * return negative if this's string is < comp, 0 if they are the
   1117      * same and positive if >.
   1118      *
   1119      * IMPORTANT NOTE:
   1120      *
   1121      * This code relies on hard-coded offsets for string objects, and must be
   1122      * kept in sync with definitions in UtfString.h.  See asm-constants.h
   1123      *
   1124      * On entry:
   1125      *    r0:   this object pointer
   1126      *    r1:   comp object pointer
   1127      *
   1128      */
   1129 
   1130     mov    r2, r0         @ this to r2, opening up r0 for return value
   1131     subs   r0, r2, r1     @ Same?
   1132     bxeq   lr
   1133 
   1134     ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
   1135     ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
   1136     ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
   1137     ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
   1138     ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
   1139     ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
   1140 
   1141     /*
   1142      * At this point, we have:
   1143      *    value:  r2/r1
   1144      *    offset: r4/r9
   1145      *    count:  r7/r10
   1146      * We're going to compute
   1147      *    r11 <- countDiff
   1148      *    r10 <- minCount
   1149      */
   1150      subs  r11, r7, r10
   1151      movls r10, r7
   1152 
   1153      /* Now, build pointers to the string data */
   1154      add   r2, r2, r4, lsl #1
   1155      add   r1, r1, r9, lsl #1
   1156      /*
   1157       * Note: data pointers point to previous element so we can use pre-index
   1158       * mode with base writeback.
   1159       */
   1160      add   r2, #16-2   @ offset to contents[-1]
   1161      add   r1, #16-2   @ offset to contents[-1]
   1162 
   1163      /*
   1164       * At this point we have:
   1165       *   r2: *this string data
   1166       *   r1: *comp string data
   1167       *   r10: iteration count for comparison
   1168       *   r11: value to return if the first part of the string is equal
   1169       *   r0: reserved for result
   1170       *   r3, r4, r7, r8, r9, r12 available for loading string data
   1171       */
   1172 
   1173     subs  r10, #2
   1174     blt   do_remainder2
   1175 
   1176       /*
   1177        * Unroll the first two checks so we can quickly catch early mismatch
   1178        * on long strings (but preserve incoming alignment)
   1179        */
   1180 
   1181     ldrh  r3, [r2, #2]!
   1182     ldrh  r4, [r1, #2]!
   1183     ldrh  r7, [r2, #2]!
   1184     ldrh  r8, [r1, #2]!
   1185     subs  r0, r3, r4
   1186     subeqs  r0, r7, r8
   1187     bxne  lr
   1188     cmp   r10, #28
   1189     bgt   do_memcmp16
   1190     subs  r10, #3
   1191     blt   do_remainder
   1192 
   1193 loopback_triple:
   1194     ldrh  r3, [r2, #2]!
   1195     ldrh  r4, [r1, #2]!
   1196     ldrh  r7, [r2, #2]!
   1197     ldrh  r8, [r1, #2]!
   1198     ldrh  r9, [r2, #2]!
   1199     ldrh  r12,[r1, #2]!
   1200     subs  r0, r3, r4
   1201     subeqs  r0, r7, r8
   1202     subeqs  r0, r9, r12
   1203     bxne  lr
   1204     subs  r10, #3
   1205     bge   loopback_triple
   1206 
   1207 do_remainder:
   1208     adds  r10, #3
   1209     beq   returnDiff
   1210 
   1211 loopback_single:
   1212     ldrh  r3, [r2, #2]!
   1213     ldrh  r4, [r1, #2]!
   1214     subs  r0, r3, r4
   1215     bxne  lr
   1216     subs  r10, #1
   1217     bne     loopback_single
   1218 
   1219 returnDiff:
   1220     mov   r0, r11
   1221     bx    lr
   1222 
   1223 do_remainder2:
   1224     adds  r10, #2
   1225     bne   loopback_single
   1226     mov   r0, r11
   1227     bx    lr
   1228 
   1229     /* Long string case */
   1230 do_memcmp16:
   1231     mov   r4, lr
   1232     ldr   lr, .Lmemcmp16
   1233     mov   r7, r11
   1234     add   r0, r2, #2
   1235     add   r1, r1, #2
   1236     mov   r2, r10
   1237     blx   lr
   1238     cmp   r0, #0
   1239     bxne  r4
   1240     mov   r0, r7
   1241     bx    r4
   1242 
   1243 .Lmemcmp16:
   1244     .word __memcmp16
   1245 
   1246 /* ------------------------------ */
   1247     .balign 4
   1248     .global dvmCompiler_TEMPLATE_STRING_INDEXOF
   1249 dvmCompiler_TEMPLATE_STRING_INDEXOF:
   1250 /* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
   1251     /*
   1252      * String's indexOf.
   1253      *
   1254      * Requires r0 to have been previously checked for null.  Will
   1255      * return index of match of r1 in r0.
   1256      *
   1257      * IMPORTANT NOTE:
   1258      *
   1259      * This code relies on hard-coded offsets for string objects, and must be
   1260      * kept in sync wth definitions in UtfString.h  See asm-constants.h
   1261      *
   1262      * On entry:
   1263      *    r0:   string object pointer
   1264      *    r1:   char to match
   1265      *    r2:   Starting offset in string data
   1266      */
   1267 
   1268     ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
   1269     ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
   1270     ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
   1271 
   1272     /*
   1273      * At this point, we have:
   1274      *    r0: object pointer
   1275      *    r1: char to match
   1276      *    r2: starting offset
   1277      *    r7: offset
   1278      *    r8: string length
   1279      */
   1280 
   1281      /* Build pointer to start of string data */
   1282      add   r0, #16
   1283      add   r0, r0, r7, lsl #1
   1284 
   1285      /* Save a copy of starting data in r7 */
   1286      mov   r7, r0
   1287 
   1288      /* Clamp start to [0..count] */
   1289      cmp   r2, #0
   1290      movlt r2, #0
   1291      cmp   r2, r8
   1292      movgt r2, r8
   1293 
   1294      /* Build pointer to start of data to compare and pre-bias */
   1295      add   r0, r0, r2, lsl #1
   1296      sub   r0, #2
   1297 
   1298      /* Compute iteration count */
   1299      sub   r8, r2
   1300 
   1301      /*
   1302       * At this point we have:
   1303       *   r0: start of data to test
   1304       *   r1: chat to compare
   1305       *   r8: iteration count
   1306       *   r7: original start of string
   1307       *   r3, r4, r9, r10, r11, r12 available for loading string data
   1308       */
   1309 
   1310     subs  r8, #4
   1311     blt   indexof_remainder
   1312 
   1313 indexof_loop4:
   1314     ldrh  r3, [r0, #2]!
   1315     ldrh  r4, [r0, #2]!
   1316     ldrh  r10, [r0, #2]!
   1317     ldrh  r11, [r0, #2]!
   1318     cmp   r3, r1
   1319     beq   match_0
   1320     cmp   r4, r1
   1321     beq   match_1
   1322     cmp   r10, r1
   1323     beq   match_2
   1324     cmp   r11, r1
   1325     beq   match_3
   1326     subs  r8, #4
   1327     bge   indexof_loop4
   1328 
   1329 indexof_remainder:
   1330     adds    r8, #4
   1331     beq     indexof_nomatch
   1332 
   1333 indexof_loop1:
   1334     ldrh  r3, [r0, #2]!
   1335     cmp   r3, r1
   1336     beq   match_3
   1337     subs  r8, #1
   1338     bne   indexof_loop1
   1339 
   1340 indexof_nomatch:
   1341     mov   r0, #-1
   1342     bx    lr
   1343 
   1344 match_0:
   1345     sub   r0, #6
   1346     sub   r0, r7
   1347     asr   r0, r0, #1
   1348     bx    lr
   1349 match_1:
   1350     sub   r0, #4
   1351     sub   r0, r7
   1352     asr   r0, r0, #1
   1353     bx    lr
   1354 match_2:
   1355     sub   r0, #2
   1356     sub   r0, r7
   1357     asr   r0, r0, #1
   1358     bx    lr
   1359 match_3:
   1360     sub   r0, r7
   1361     asr   r0, r0, #1
   1362     bx    lr
   1363 
   1364 /* ------------------------------ */
   1365     .balign 4
   1366     .global dvmCompiler_TEMPLATE_INTERPRET
   1367 dvmCompiler_TEMPLATE_INTERPRET:
   1368 /* File: armv5te/TEMPLATE_INTERPRET.S */
   1369     /*
   1370      * This handler transfers control to the interpeter without performing
   1371      * any lookups.  It may be called either as part of a normal chaining
   1372      * operation, or from the transition code in header.S.  We distinquish
   1373      * the two cases by looking at the link register.  If called from a
   1374      * translation chain, it will point to the chaining Dalvik PC -3.
   1375      * On entry:
   1376      *    lr - if NULL:
   1377      *        r1 - the Dalvik PC to begin interpretation.
   1378      *    else
   1379      *        [lr, #3] contains Dalvik PC to begin interpretation
   1380      *    rSELF - pointer to thread
   1381      *    rFP - Dalvik frame pointer
   1382      */
   1383     cmp     lr, #0
   1384 #if defined(WORKAROUND_CORTEX_A9_745320)
   1385     /* Don't use conditional loads if the HW defect exists */
   1386     beq     101f
   1387     ldr     r1,[lr, #3]
   1388 101:
   1389 #else
   1390     ldrne   r1,[lr, #3]
   1391 #endif
   1392     ldr     r2, .LinterpPunt
   1393     mov     r0, r1                       @ set Dalvik PC
   1394     bx      r2
   1395     @ doesn't return
   1396 
   1397 .LinterpPunt:
   1398     .word   dvmJitToInterpPunt
   1399 
   1400 /* ------------------------------ */
   1401     .balign 4
   1402     .global dvmCompiler_TEMPLATE_MONITOR_ENTER
   1403 dvmCompiler_TEMPLATE_MONITOR_ENTER:
   1404 /* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
   1405     /*
   1406      * Call out to the runtime to lock an object.  Because this thread
   1407      * may have been suspended in THREAD_MONITOR state and the Jit's
   1408      * translation cache subsequently cleared, we cannot return directly.
   1409      * Instead, unconditionally transition to the interpreter to resume.
   1410      *
   1411      * On entry:
   1412      *    r0 - self pointer
   1413      *    r1 - the object (which has already been null-checked by the caller
   1414      *    r4 - the Dalvik PC of the following instruction.
   1415      */
   1416     ldr     r2, .LdvmLockObject
   1417     mov     r3, #0                       @ Record that we're not returning
   1418     str     r3, [r0, #offThread_inJitCodeCache]
   1419     blx     r2                           @ dvmLockObject(self, obj)
   1420     ldr     r2, .LdvmJitToInterpNoChain
   1421     @ Bail to interpreter - no chain [note - r4 still contains rPC]
   1422 #if defined(WITH_JIT_TUNING)
   1423     mov     r0, #kHeavyweightMonitor
   1424 #endif
   1425     bx      r2
   1426 
   1427 /* ------------------------------ */
   1428     .balign 4
   1429     .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
   1430 dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
   1431 /* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
   1432     /*
   1433      * To support deadlock prediction, this version of MONITOR_ENTER
   1434      * will always call the heavyweight dvmLockObject, check for an
   1435      * exception and then bail out to the interpreter.
   1436      *
   1437      * On entry:
   1438      *    r0 - self pointer
   1439      *    r1 - the object (which has already been null-checked by the caller
   1440      *    r4 - the Dalvik PC of the following instruction.
   1441      *
   1442      */
   1443     ldr     r2, .LdvmLockObject
   1444     mov     r3, #0                       @ Record that we're not returning
   1445     str     r3, [r0, #offThread_inJitCodeCache]
   1446     blx     r2             @ dvmLockObject(self, obj)
   1447     @ test for exception
   1448     ldr     r1, [rSELF, #offThread_exception]
   1449     cmp     r1, #0
   1450     beq     1f
   1451     ldr     r2, .LhandleException
   1452     sub     r0, r4, #2     @ roll dPC back to this monitor instruction
   1453     bx      r2
   1454 1:
   1455     @ Bail to interpreter - no chain [note - r4 still contains rPC]
   1456 #if defined(WITH_JIT_TUNING)
   1457     mov     r0, #kHeavyweightMonitor
   1458 #endif
   1459     ldr     pc, .LdvmJitToInterpNoChain
   1460 
   1461 /* ------------------------------ */
   1462     .balign 4
   1463     .global dvmCompiler_TEMPLATE_PERIODIC_PROFILING
   1464 dvmCompiler_TEMPLATE_PERIODIC_PROFILING:
   1465 /* File: armv5te/TEMPLATE_PERIODIC_PROFILING.S */
   1466     /*
   1467      * Increment profile counter for this trace, and decrement
   1468      * sample counter.  If sample counter goes below zero, turn
   1469      * off profiling.
   1470      *
   1471      * On entry
   1472      * (lr-11) is address of pointer to counter.  Note: the counter
   1473      *    actually exists 10 bytes before the return target, but because
   1474      *    we are arriving from thumb mode, lr will have its low bit set.
   1475      */
   1476      ldr    r0, [lr,#-11]
   1477      ldr    r1, [rSELF, #offThread_pProfileCountdown]
   1478      ldr    r2, [r0]                    @ get counter
   1479      ldr    r3, [r1]                    @ get countdown timer
   1480      add    r2, #1
   1481      subs   r2, #1
   1482      blt    .LTEMPLATE_PERIODIC_PROFILING_disable_profiling
   1483      str    r2, [r0]
   1484      str    r3, [r1]
   1485      bx     lr
   1486 
   1487 .LTEMPLATE_PERIODIC_PROFILING_disable_profiling:
   1488      mov    r4, lr                     @ preserve lr
   1489      ldr    r0, .LdvmJitTraceProfilingOff
   1490      blx    r0
   1491      bx     r4
   1492 
   1493 /* ------------------------------ */
   1494     .balign 4
   1495     .global dvmCompiler_TEMPLATE_RETURN_PROF
   1496 dvmCompiler_TEMPLATE_RETURN_PROF:
   1497 /* File: armv5te/TEMPLATE_RETURN_PROF.S */
   1498 #define TEMPLATE_INLINE_PROFILING
   1499 /* File: armv5te/TEMPLATE_RETURN.S */
   1500     /*
   1501      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
   1502      * If the stored value in returnAddr
   1503      * is non-zero, the caller is compiled by the JIT thus return to the
   1504      * address in the code cache following the invoke instruction. Otherwise
   1505      * return to the special dvmJitToInterpNoChain entry point.
   1506      */
   1507 #if defined(TEMPLATE_INLINE_PROFILING)
   1508     stmfd   sp!, {r0-r2,lr}             @ preserve live registers
   1509     mov     r0, r6
   1510     @ r0=rSELF
   1511     mov     lr, pc
   1512     ldr     pc, .LdvmFastMethodTraceExit
   1513     ldmfd   sp!, {r0-r2,lr}             @ restore live registers
   1514 #endif
   1515     SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
   1516     ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
   1517     ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
   1518     ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
   1519 #if !defined(WITH_SELF_VERIFICATION)
   1520     ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
   1521 #else
   1522     mov     r9, #0                      @ disable chaining
   1523 #endif
   1524     ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
   1525                                         @ r2<- method we're returning to
   1526     cmp     r2, #0                      @ break frame?
   1527 #if !defined(WITH_SELF_VERIFICATION)
   1528     beq     1f                          @ bail to interpreter
   1529 #else
   1530     blxeq   lr                          @ punt to interpreter and compare state
   1531 #endif
   1532     ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
   1533     mov     rFP, r10                    @ publish new FP
   1534     ldr     r10, [r2, #offMethod_clazz] @ r10<- method->clazz
   1535 
   1536     str     r2, [rSELF, #offThread_method]@ self->method = newSave->method
   1537     ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
   1538     str     rFP, [rSELF, #offThread_curFrame] @ curFrame = fp
   1539     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
   1540     str     r0, [rSELF, #offThread_methodClassDex]
   1541     cmp     r8, #0                      @ check the break flags
   1542     movne   r9, #0                      @ clear the chaining cell address
   1543     str     r9, [rSELF, #offThread_inJitCodeCache] @ in code cache or not
   1544     cmp     r9, #0                      @ chaining cell exists?
   1545     blxne   r9                          @ jump to the chaining cell
   1546 #if defined(WITH_JIT_TUNING)
   1547     mov     r0, #kCallsiteInterpreted
   1548 #endif
   1549     mov     pc, r1                      @ callsite is interpreted
   1550 1:
   1551     mov     r0, #0
   1552     str     r0, [rSELF, #offThread_inJitCodeCache] @ reset inJitCodeCache
   1553     stmia   rSELF, {rPC, rFP}           @ SAVE_PC_FP_TO_SELF()
   1554     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
   1555     mov     r0, rSELF                   @ Expecting rSELF in r0
   1556     blx     r2                          @ exit the interpreter
   1557 
   1558 #undef TEMPLATE_INLINE_PROFILING
   1559 
   1560 /* ------------------------------ */
   1561     .balign 4
   1562     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF
   1563 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT_PROF:
   1564 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT_PROF.S */
   1565 #define TEMPLATE_INLINE_PROFILING
   1566 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
   1567     /*
   1568      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
   1569      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
   1570      * runtime-resolved callee.
   1571      */
   1572     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
   1573     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
   1574     ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
   1575     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
   1576     ldrb    r8, [rSELF, #offThread_breakFlags] @ r8<- breakFlags
   1577     add     r3, r1, #1  @ Thumb addr is odd
   1578     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
   1579     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
   1580     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
   1581     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
   1582     cmp     r10, r9                     @ bottom < interpStackEnd?
   1583     bxlo    lr                          @ return to raise stack overflow excep.
   1584     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
   1585     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
   1586     ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
   1587     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
   1588     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
   1589     ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
   1590 
   1591 
   1592     @ set up newSaveArea
   1593     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
   1594     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
   1595     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
   1596     cmp     r8, #0                      @ breakFlags != 0
   1597     bxne    lr                          @ bail to the interpreter
   1598     tst     r10, #ACC_NATIVE
   1599 #if !defined(WITH_SELF_VERIFICATION)
   1600     bne     .LinvokeNative
   1601 #else
   1602     bxne    lr                          @ bail to the interpreter
   1603 #endif
   1604 
   1605     ldr     r10, .LdvmJitToInterpTraceSelectNoChain
   1606     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
   1607 
   1608     @ Update "thread" values for the new method
   1609     str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
   1610     str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
   1611     mov     rFP, r1                         @ fp = newFp
   1612     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
   1613 #if defined(TEMPLATE_INLINE_PROFILING)
   1614     stmfd   sp!, {r0-r3}                    @ preserve r0-r3
   1615     mov     r1, r6
   1616     @ r0=methodToCall, r1=rSELF
   1617     mov     lr, pc
   1618     ldr     pc, .LdvmFastMethodTraceEnter
   1619     ldmfd   sp!, {r0-r3}                    @ restore r0-r3
   1620 #endif
   1621 
   1622     @ Start executing the callee
   1623 #if defined(WITH_JIT_TUNING)
   1624     mov     r0, #kInlineCacheMiss
   1625 #endif
   1626     mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
   1627 
   1628 #undef TEMPLATE_INLINE_PROFILING
   1629 
   1630 /* ------------------------------ */
   1631     .balign 4
   1632     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF
   1633 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN_PROF:
   1634 /* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN_PROF.S */
   1635 #define TEMPLATE_INLINE_PROFILING
   1636 /* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
   1637     /*
   1638      * For monomorphic callsite, setup the Dalvik frame and return to the
   1639      * Thumb code through the link register to transfer control to the callee
   1640      * method through a dedicated chaining cell.
   1641      */
   1642     @ r0 = methodToCall, r1 = returnCell, r2 = methodToCall->outsSize
   1643     @ rPC = dalvikCallsite, r7 = methodToCall->registersSize
   1644     @ methodToCall is guaranteed to be non-native
   1645 .LinvokeChainProf:
   1646     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
   1647     ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
   1648     add     r3, r1, #1  @ Thumb addr is odd
   1649     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
   1650     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
   1651     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
   1652     add     r12, lr, #2                 @ setup the punt-to-interp address
   1653     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
   1654     cmp     r10, r9                     @ bottom < interpStackEnd?
   1655     bxlo    r12                         @ return to raise stack overflow excep.
   1656     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
   1657     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
   1658     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
   1659     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
   1660 
   1661     @ set up newSaveArea
   1662     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
   1663     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
   1664     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
   1665     cmp     r8, #0                      @ breakFlags != 0
   1666     bxne    r12                         @ bail to the interpreter
   1667 
   1668     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
   1669 
   1670     @ Update "thread" values for the new method
   1671     str     r0, [rSELF, #offThread_method]    @ self->method = methodToCall
   1672     str     r3, [rSELF, #offThread_methodClassDex] @ self->methodClassDex = ...
   1673     mov     rFP, r1                         @ fp = newFp
   1674     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = newFp
   1675 #if defined(TEMPLATE_INLINE_PROFILING)
   1676     stmfd   sp!, {r0-r2,lr}             @ preserve clobbered live registers
   1677     mov     r1, r6
   1678     @ r0=methodToCall, r1=rSELF
   1679     mov     lr, pc
   1680     ldr     pc, .LdvmFastMethodTraceEnter
   1681     ldmfd   sp!, {r0-r2,lr}             @ restore registers
   1682 #endif
   1683 
   1684     bx      lr                              @ return to the callee-chaining cell
   1685 
   1686 #undef TEMPLATE_INLINE_PROFILING
   1687 
   1688 /* ------------------------------ */
   1689     .balign 4
   1690     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF
   1691 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF:
   1692 /* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN_PROF.S */
   1693 #define TEMPLATE_INLINE_PROFILING
   1694 /* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
   1695     /*
   1696      * For polymorphic callsite, check whether the cached class pointer matches
   1697      * the current one. If so setup the Dalvik frame and return to the
   1698      * Thumb code through the link register to transfer control to the callee
   1699      * method through a dedicated chaining cell.
   1700      *
   1701      * The predicted chaining cell is declared in ArmLIR.h with the
   1702      * following layout:
   1703      *
   1704      *  typedef struct PredictedChainingCell {
   1705      *      u4 branch;
   1706      *      const ClassObject *clazz;
   1707      *      const Method *method;
   1708      *      u4 counter;
   1709      *  } PredictedChainingCell;
   1710      *
   1711      * Upon returning to the callsite:
   1712      *    - lr  : to branch to the chaining cell
   1713      *    - lr+2: to punt to the interpreter
   1714      *    - lr+4: to fully resolve the callee and may rechain.
   1715      *            r3 <- class
   1716      *            r9 <- counter
   1717      */
   1718     @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
   1719     ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
   1720     ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
   1721     ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
   1722     ldr     r9, [rSELF, #offThread_icRechainCount] @ r1 <- shared rechainCount
   1723     cmp     r3, r8          @ predicted class == actual class?
   1724 #if defined(WITH_JIT_TUNING)
   1725     ldr     r7, .LdvmICHitCount
   1726 #if defined(WORKAROUND_CORTEX_A9_745320)
   1727     /* Don't use conditional loads if the HW defect exists */
   1728     bne     101f
   1729     ldr     r10, [r7, #0]
   1730 101:
   1731 #else
   1732     ldreq   r10, [r7, #0]
   1733 #endif
   1734     add     r10, r10, #1
   1735     streq   r10, [r7, #0]
   1736 #endif
   1737     ldreqh  r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
   1738     ldreqh  r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
   1739     beq     .LinvokeChainProf   @ predicted chain is valid
   1740     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
   1741     cmp     r8, #0          @ initialized class or not
   1742     moveq   r1, #0
   1743     subne   r1, r9, #1      @ count--
   1744     strne   r1, [rSELF, #offThread_icRechainCount]  @ write back to thread
   1745     add     lr, lr, #4      @ return to fully-resolve landing pad
   1746     /*
   1747      * r1 <- count
   1748      * r2 <- &predictedChainCell
   1749      * r3 <- this->class
   1750      * r4 <- dPC
   1751      * r7 <- this->class->vtable
   1752      */
   1753     bx      lr
   1754 
   1755 #undef TEMPLATE_INLINE_PROFILING
   1756 
   1757 /* ------------------------------ */
   1758     .balign 4
   1759     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF
   1760 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE_PROF:
   1761 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE_PROF.S */
   1762 #define TEMPLATE_INLINE_PROFILING
   1763 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
   1764     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
   1765     @ r7 = methodToCall->registersSize
   1766     ldr     r9, [rSELF, #offThread_interpStackEnd]    @ r9<- interpStackEnd
   1767     ldrb    r8, [rSELF, #offThread_breakFlags]        @ r8<- breakFlags
   1768     add     r3, r1, #1  @ Thumb addr is odd
   1769     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
   1770     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
   1771     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
   1772     cmp     r10, r9                     @ bottom < interpStackEnd?
   1773     bxlo    lr                          @ return to raise stack overflow excep.
   1774     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
   1775     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
   1776     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
   1777 
   1778     @ set up newSaveArea
   1779     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
   1780     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
   1781     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
   1782     cmp     r8, #0                      @ breakFlags != 0
   1783     ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
   1784 #if !defined(WITH_SELF_VERIFICATION)
   1785     bxne    lr                          @ bail to the interpreter
   1786 #else
   1787     bx      lr                          @ bail to interpreter unconditionally
   1788 #endif
   1789 
   1790     @ go ahead and transfer control to the native code
   1791     ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
   1792     mov     r2, #0
   1793     str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
   1794     str     r2, [rSELF, #offThread_inJitCodeCache] @ not in the jit code cache
   1795     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
   1796                                         @ newFp->localRefCookie=top
   1797     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
   1798 
   1799     mov     r2, r0                        @ arg2<- methodToCall
   1800     mov     r0, r1                        @ arg0<- newFP
   1801     add     r1, rSELF, #offThread_retval  @ arg1<- &retval
   1802     mov     r3, rSELF                     @ arg3<- self
   1803 #if defined(TEMPLATE_INLINE_PROFILING)
   1804     @ r2=methodToCall, r6=rSELF
   1805     stmfd   sp!, {r2,r6}                @ to be consumed after JNI return
   1806     stmfd   sp!, {r0-r3}                @ preserve r0-r3
   1807     mov     r0, r2
   1808     mov     r1, r6
   1809     @ r0=JNIMethod, r1=rSELF
   1810     mov     lr, pc
   1811     ldr     pc, .LdvmFastMethodTraceEnter
   1812     ldmfd   sp!, {r0-r3}                @ restore r0-r3
   1813 #endif
   1814 
   1815     blx     r8                          @ off to the native code
   1816 
   1817 #if defined(TEMPLATE_INLINE_PROFILING)
   1818     ldmfd   sp!, {r0-r1}                @ restore r2 and r6
   1819     @ r0=JNIMethod, r1=rSELF
   1820     mov     lr, pc
   1821     ldr     pc, .LdvmFastNativeMethodTraceExit
   1822 #endif
   1823     @ native return; r10=newSaveArea
   1824     @ equivalent to dvmPopJniLocals
   1825     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
   1826     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
   1827     ldr     r1, [rSELF, #offThread_exception] @ check for exception
   1828     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
   1829     cmp     r1, #0                      @ null?
   1830     str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
   1831     ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
   1832 
   1833     @ r0 = dalvikCallsitePC
   1834     bne     .LhandleException           @ no, handle exception
   1835 
   1836     str     r2, [rSELF, #offThread_inJitCodeCache] @ set the mode properly
   1837     cmp     r2, #0                      @ return chaining cell still exists?
   1838     bxne    r2                          @ yes - go ahead
   1839 
   1840     @ continue executing the next instruction through the interpreter
   1841     ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
   1842     add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
   1843 #if defined(WITH_JIT_TUNING)
   1844     mov     r0, #kCallsiteInterpreted
   1845 #endif
   1846     mov     pc, r1
   1847 
   1848 #undef TEMPLATE_INLINE_PROFILING
   1849 
   1850     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
   1851 /* File: armv5te/footer.S */
   1852 /*
   1853  * ===========================================================================
   1854  *  Common subroutines and data
   1855  * ===========================================================================
   1856  */
   1857 
   1858     .text
   1859     .align  2
   1860 .LinvokeNative:
   1861     @ Prep for the native call
   1862     @ r1 = newFP, r0 = methodToCall
   1863     mov     r2, #0
   1864     ldr     r9, [rSELF, #offThread_jniLocal_topCookie]@r9<-thread->localRef->...
   1865     str     r2, [rSELF, #offThread_inJitCodeCache] @ not in jit code cache
   1866     str     r1, [rSELF, #offThread_curFrame]   @ curFrame = newFp
   1867     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
   1868                                         @ newFp->localRefCookie=top
   1869     ldrh    lr, [rSELF, #offThread_subMode]
   1870     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
   1871 
   1872     mov     r2, r0                      @ r2<- methodToCall
   1873     mov     r0, r1                      @ r0<- newFP
   1874     add     r1, rSELF, #offThread_retval  @ r1<- &retval
   1875     mov     r3, rSELF                   @ arg3<- self
   1876     ands    lr, #kSubModeMethodTrace
   1877     beq     121f                        @ hop if not profiling
   1878     @ r2: methodToCall, r6: rSELF
   1879     stmfd   sp!, {r2,r6}
   1880     stmfd   sp!, {r0-r3}
   1881     mov     r0, r2
   1882     mov     r1, r6
   1883     mov     lr, pc
   1884     ldr     pc, .LdvmFastMethodTraceEnter
   1885     ldmfd   sp!, {r0-r3}
   1886 
   1887     mov     lr, pc
   1888     ldr     pc, [r2, #offMethod_nativeFunc]
   1889 
   1890     ldmfd   sp!, {r0-r1}
   1891     mov     lr, pc
   1892     ldr     pc, .LdvmFastNativeMethodTraceExit
   1893     b       212f
   1894 121:
   1895     mov     lr, pc
   1896     ldr     pc, [r2, #offMethod_nativeFunc]
   1897 212:
   1898 
   1899     @ native return; r10=newSaveArea
   1900     @ equivalent to dvmPopJniLocals
   1901     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
   1902     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
   1903     ldr     r1, [rSELF, #offThread_exception] @ check for exception
   1904     str     rFP, [rSELF, #offThread_curFrame]  @ curFrame = fp
   1905     cmp     r1, #0                      @ null?
   1906     str     r0, [rSELF, #offThread_jniLocal_topCookie] @ new top <- old top
   1907     ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
   1908 
   1909     @ r0 = dalvikCallsitePC
   1910     bne     .LhandleException           @ no, handle exception
   1911 
   1912     str     r2, [rSELF, #offThread_inJitCodeCache] @ set the new mode
   1913     cmp     r2, #0                      @ return chaining cell still exists?
   1914     bxne    r2                          @ yes - go ahead
   1915 
   1916     @ continue executing the next instruction through the interpreter
   1917     ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
   1918     add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
   1919 #if defined(WITH_JIT_TUNING)
   1920     mov     r0, #kCallsiteInterpreted
   1921 #endif
   1922     mov     pc, r1
   1923 
   1924 /*
   1925  * On entry:
   1926  * r0  Faulting Dalvik PC
   1927  */
   1928 .LhandleException:
   1929 #if defined(WITH_SELF_VERIFICATION)
   1930     ldr     pc, .LdeadFood @ should not see this under self-verification mode
   1931 .LdeadFood:
   1932     .word   0xdeadf00d
   1933 #endif
   1934     mov     r2, #0
   1935     str     r2, [rSELF, #offThread_inJitCodeCache] @ in interpreter land
   1936     ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
   1937     ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
   1938     mov     rPC, r0                 @ reload the faulting Dalvik address
   1939     mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
   1940 
   1941     .align  2
   1942 .LdvmAsmInstructionStart:
   1943     .word   dvmAsmInstructionStart
   1944 .LdvmJitToInterpNoChainNoProfile:
   1945     .word   dvmJitToInterpNoChainNoProfile
   1946 .LdvmJitToInterpTraceSelectNoChain:
   1947     .word   dvmJitToInterpTraceSelectNoChain
   1948 .LdvmJitToInterpNoChain:
   1949     .word   dvmJitToInterpNoChain
   1950 .LdvmMterpStdBail:
   1951     .word   dvmMterpStdBail
   1952 .LdvmMterpCommonExceptionThrown:
   1953     .word   dvmMterpCommonExceptionThrown
   1954 .LdvmLockObject:
   1955     .word   dvmLockObject
   1956 .LdvmJitTraceProfilingOff:
   1957     .word   dvmJitTraceProfilingOff
   1958 #if defined(WITH_JIT_TUNING)
   1959 .LdvmICHitCount:
   1960     .word   gDvmICHitCount
   1961 #endif
   1962 #if defined(WITH_SELF_VERIFICATION)
   1963 .LdvmSelfVerificationMemOpDecode:
   1964     .word   dvmSelfVerificationMemOpDecode
   1965 #endif
   1966 .LdvmFastMethodTraceEnter:
   1967     .word   dvmFastMethodTraceEnter
   1968 .LdvmFastNativeMethodTraceExit:
   1969     .word   dvmFastNativeMethodTraceExit
   1970 .LdvmFastMethodTraceExit:
   1971     .word   dvmFastMethodTraceExit
   1972 .L__aeabi_cdcmple:
   1973     .word   __aeabi_cdcmple
   1974 .L__aeabi_cfcmple:
   1975     .word   __aeabi_cfcmple
   1976 
   1977     .global dmvCompilerTemplateEnd
   1978 dmvCompilerTemplateEnd:
   1979 
   1980 #endif /* WITH_JIT */
   1981 
   1982