Home | History | Annotate | Download | only in out
      1 /*
      2  * This file was generated automatically by gen-template.py for 'armv7-a'.
      3  *
      4  * --> DO NOT EDIT <--
      5  */
      6 
      7 /* File: armv5te/header.S */
      8 /*
      9  * Copyright (C) 2008 The Android Open Source Project
     10  *
     11  * Licensed under the Apache License, Version 2.0 (the "License");
     12  * you may not use this file except in compliance with the License.
     13  * You may obtain a copy of the License at
     14  *
     15  *      http://www.apache.org/licenses/LICENSE-2.0
     16  *
     17  * Unless required by applicable law or agreed to in writing, software
     18  * distributed under the License is distributed on an "AS IS" BASIS,
     19  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     20  * See the License for the specific language governing permissions and
     21  * limitations under the License.
     22  */
     23 
     24 #if defined(WITH_JIT)
     25 
     26 /*
     27  * ARMv5 definitions and declarations.
     28  */
     29 
     30 /*
     31 ARM EABI general notes:
     32 
     33 r0-r3 hold first 4 args to a method; they are not preserved across method calls
     34 r4-r8 are available for general use
     35 r9 is given special treatment in some situations, but not for us
     36 r10 (sl) seems to be generally available
     37 r11 (fp) is used by gcc (unless -fomit-frame-pointer is set)
     38 r12 (ip) is scratch -- not preserved across method calls
     39 r13 (sp) should be managed carefully in case a signal arrives
     40 r14 (lr) must be preserved
     41 r15 (pc) can be tinkered with directly
     42 
     43 r0 holds returns of <= 4 bytes
     44 r0-r1 hold returns of 8 bytes, low word in r0
     45 
     46 Callee must save/restore r4+ (except r12) if it modifies them.
     47 
     48 Stack is "full descending".  Only the arguments that don't fit in the first 4
     49 registers are placed on the stack.  "sp" points at the first stacked argument
     50 (i.e. the 5th arg).
     51 
     52 VFP: single-precision results in s0, double-precision results in d0.
     53 
     54 In the EABI, "sp" must be 64-bit aligned on entry to a function, and any
     55 64-bit quantities (long long, double) must be 64-bit aligned.
     56 */
     57 
     58 /*
     59 JIT and ARM notes:
     60 
     61 The following registers have fixed assignments:
     62 
     63   reg nick      purpose
     64   r5  rFP       interpreted frame pointer, used for accessing locals and args
     65   r6  rGLUE     MterpGlue pointer
     66 
     67 The following registers have fixed assignments in mterp but are scratch
     68 registers in compiled code
     69 
     70   reg nick      purpose
     71   r4  rPC       interpreted program counter, used for fetching instructions
     72   r7  rINST     first 16-bit code unit of current instruction
     73   r8  rIBASE    interpreted instruction base pointer, used for computed goto
     74 
     75 Macros are provided for common operations.  Each macro MUST emit only
     76 one instruction to make instruction-counting easier.  They MUST NOT alter
     77 unspecified registers or condition codes.
     78 */
     79 
     80 /* single-purpose registers, given names for clarity */
     81 #define rPC     r4
     82 #define rFP     r5
     83 #define rGLUE   r6
     84 #define rINST   r7
     85 #define rIBASE  r8
     86 
     87 /*
     88  * Given a frame pointer, find the stack save area.
     89  *
     90  * In C this is "((StackSaveArea*)(_fp) -1)".
     91  */
     92 #define SAVEAREA_FROM_FP(_reg, _fpreg) \
     93     sub     _reg, _fpreg, #sizeofStackSaveArea
     94 
     95 #define EXPORT_PC() \
     96     str     rPC, [rFP, #(-sizeofStackSaveArea + offStackSaveArea_currentPc)]
     97 
     98 /*
     99  * This is a #include, not a %include, because we want the C pre-processor
    100  * to expand the macros into assembler assignment statements.
    101  */
    102 #include "../../../mterp/common/asm-constants.h"
    103 
    104 /* File: armv5te-vfp/platform.S */
    105 /*
    106  * ===========================================================================
    107  *  CPU-version-specific defines and utility
    108  * ===========================================================================
    109  */
    110 
    111 /*
    112  * Macro for "MOV LR,PC / LDR PC,xxx", which is not allowed pre-ARMv5.
    113  * Jump to subroutine.
    114  *
    115  * May modify IP and LR.
    116  */
    117 .macro  LDR_PC_LR source
    118     mov     lr, pc
    119     ldr     pc, \source
    120 .endm
    121 
    122 
    123     .global dvmCompilerTemplateStart
    124     .type   dvmCompilerTemplateStart, %function
    125     .text
    126 
    127 dvmCompilerTemplateStart:
    128 
    129 /* ------------------------------ */
    130     .balign 4
    131     .global dvmCompiler_TEMPLATE_CMP_LONG
    132 dvmCompiler_TEMPLATE_CMP_LONG:
    133 /* File: armv5te/TEMPLATE_CMP_LONG.S */
    134     /*
    135      * Compare two 64-bit values.  Puts 0, 1, or -1 into the destination
    136      * register based on the results of the comparison.
    137      *
    138      * We load the full values with LDM, but in practice many values could
    139      * be resolved by only looking at the high word.  This could be made
    140      * faster or slower by splitting the LDM into a pair of LDRs.
    141      *
    142      * If we just wanted to set condition flags, we could do this:
    143      *  subs    ip, r0, r2
    144      *  sbcs    ip, r1, r3
    145      *  subeqs  ip, r0, r2
    146      * Leaving { <0, 0, >0 } in ip.  However, we have to set it to a specific
    147      * integer value, which we can do with 2 conditional mov/mvn instructions
    148      * (set 1, set -1; if they're equal we already have 0 in ip), giving
    149      * us a constant 5-cycle path plus a branch at the end to the
    150      * instruction epilogue code.  The multi-compare approach below needs
    151      * 2 or 3 cycles + branch if the high word doesn't match, 6 + branch
    152      * in the worst case (the 64-bit values are equal).
    153      */
    154     /* cmp-long vAA, vBB, vCC */
    155     cmp     r1, r3                      @ compare (vBB+1, vCC+1)
    156     blt     .LTEMPLATE_CMP_LONG_less            @ signed compare on high part
    157     bgt     .LTEMPLATE_CMP_LONG_greater
    158     subs    r0, r0, r2                  @ r0<- r0 - r2
    159     bxeq     lr
    160     bhi     .LTEMPLATE_CMP_LONG_greater         @ unsigned compare on low part
    161 .LTEMPLATE_CMP_LONG_less:
    162     mvn     r0, #0                      @ r0<- -1
    163     bx      lr
    164 .LTEMPLATE_CMP_LONG_greater:
    165     mov     r0, #1                      @ r0<- 1
    166     bx      lr
    167 
    168 /* ------------------------------ */
    169     .balign 4
    170     .global dvmCompiler_TEMPLATE_RETURN
    171 dvmCompiler_TEMPLATE_RETURN:
    172 /* File: armv5te/TEMPLATE_RETURN.S */
    173     /*
    174      * Unwind a frame from the Dalvik stack for compiled OP_RETURN_XXX.
    175      * If the stored value in returnAddr
    176      * is non-zero, the caller is compiled by the JIT thus return to the
    177      * address in the code cache following the invoke instruction. Otherwise
    178      * return to the special dvmJitToInterpNoChain entry point.
    179      */
    180     SAVEAREA_FROM_FP(r0, rFP)           @ r0<- saveArea (old)
    181     ldr     r10, [r0, #offStackSaveArea_prevFrame] @ r10<- saveArea->prevFrame
    182     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
    183     ldr     rPC, [r0, #offStackSaveArea_savedPc] @ rPC<- saveArea->savedPc
    184 #if !defined(WITH_SELF_VERIFICATION)
    185     ldr     r9,  [r0, #offStackSaveArea_returnAddr] @ r9<- chaining cell ret
    186 #else
    187     mov     r9, #0                      @ disable chaining
    188 #endif
    189     ldr     r2, [r10, #(offStackSaveArea_method - sizeofStackSaveArea)]
    190                                         @ r2<- method we're returning to
    191     ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
    192     cmp     r2, #0                      @ break frame?
    193 #if !defined(WITH_SELF_VERIFICATION)
    194     beq     1f                          @ bail to interpreter
    195 #else
    196     blxeq   lr                          @ punt to interpreter and compare state
    197 #endif
    198     ldr     r1, .LdvmJitToInterpNoChainNoProfile @ defined in footer.S
    199     mov     rFP, r10                    @ publish new FP
    200     ldrne   r10, [r2, #offMethod_clazz] @ r10<- method->clazz
    201     ldr     r8, [r8]                    @ r8<- suspendCount
    202 
    203     str     r2, [rGLUE, #offGlue_method]@ glue->method = newSave->method
    204     ldr     r0, [r10, #offClassObject_pDvmDex] @ r0<- method->clazz->pDvmDex
    205     str     rFP, [r3, #offThread_curFrame] @ self->curFrame = fp
    206     add     rPC, rPC, #6                @ publish new rPC (advance 6 bytes)
    207     str     r0, [rGLUE, #offGlue_methodClassDex]
    208     cmp     r8, #0                      @ check the suspendCount
    209     movne   r9, #0                      @ clear the chaining cell address
    210     str     r9, [r3, #offThread_inJitCodeCache] @ in code cache or not
    211     cmp     r9, #0                      @ chaining cell exists?
    212     blxne   r9                          @ jump to the chaining cell
    213 #if defined(WITH_JIT_TUNING)
    214     mov     r0, #kCallsiteInterpreted
    215 #endif
    216     mov     pc, r1                      @ callsite is interpreted
    217 1:
    218     stmia   rGLUE, {rPC, rFP}           @ SAVE_PC_FP_TO_GLUE()
    219     ldr     r2, .LdvmMterpStdBail       @ defined in footer.S
    220     mov     r1, #0                      @ changeInterp = false
    221     mov     r0, rGLUE                   @ Expecting rGLUE in r0
    222     blx     r2                          @ exit the interpreter
    223 
    224 /* ------------------------------ */
    225     .balign 4
    226     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT
    227 dvmCompiler_TEMPLATE_INVOKE_METHOD_NO_OPT:
    228 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NO_OPT.S */
    229     /*
    230      * For polymorphic callsites - setup the Dalvik frame and load Dalvik PC
    231      * into rPC then jump to dvmJitToInterpNoChain to dispatch the
    232      * runtime-resolved callee.
    233      */
    234     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
    235     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
    236     ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
    237     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
    238     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
    239     add     r3, r1, #1  @ Thumb addr is odd
    240     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    241     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    242     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    243     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
    244     ldr     r8, [r8]                    @ r8<- suspendCount (int)
    245     cmp     r10, r9                     @ bottom < interpStackEnd?
    246     bxlo    lr                          @ return to raise stack overflow excep.
    247     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    248     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
    249     ldr     r10, [r0, #offMethod_accessFlags] @ r10<- methodToCall->accessFlags
    250     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    251     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    252     ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
    253 
    254 
    255     @ set up newSaveArea
    256     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    257     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    258     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    259     cmp     r8, #0                      @ suspendCount != 0
    260     bxne    lr                          @ bail to the interpreter
    261     tst     r10, #ACC_NATIVE
    262 #if !defined(WITH_SELF_VERIFICATION)
    263     bne     .LinvokeNative
    264 #else
    265     bxne    lr                          @ bail to the interpreter
    266 #endif
    267 
    268     ldr     r10, .LdvmJitToInterpTraceSelectNoChain
    269     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
    270     ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
    271 
    272     @ Update "glue" values for the new method
    273     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
    274     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
    275     mov     rFP, r1                         @ fp = newFp
    276     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
    277 
    278     @ Start executing the callee
    279 #if defined(WITH_JIT_TUNING)
    280     mov     r0, #kInlineCacheMiss
    281 #endif
    282     mov     pc, r10                         @ dvmJitToInterpTraceSelectNoChain
    283 
    284 /* ------------------------------ */
    285     .balign 4
    286     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN
    287 dvmCompiler_TEMPLATE_INVOKE_METHOD_CHAIN:
    288 /* File: armv5te/TEMPLATE_INVOKE_METHOD_CHAIN.S */
    289     /*
    290      * For monomorphic callsite, setup the Dalvik frame and return to the
    291      * Thumb code through the link register to transfer control to the callee
    292      * method through a dedicated chaining cell.
    293      */
    294     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
    295     @ methodToCall is guaranteed to be non-native
    296 .LinvokeChain:
    297     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
    298     ldrh    r2, [r0, #offMethod_outsSize]  @ r2<- methodToCall->outsSize
    299     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
    300     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
    301     add     r3, r1, #1  @ Thumb addr is odd
    302     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    303     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    304     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    305     add     r12, lr, #2                 @ setup the punt-to-interp address
    306     sub     r10, r10, r2, lsl #2        @ r10<- bottom (newsave - outsSize)
    307     ldr     r8, [r8]                    @ r8<- suspendCount (int)
    308     cmp     r10, r9                     @ bottom < interpStackEnd?
    309     bxlo    r12                         @ return to raise stack overflow excep.
    310     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    311     ldr     r9, [r0, #offMethod_clazz]      @ r9<- method->clazz
    312     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    313     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    314     ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
    315 
    316 
    317     @ set up newSaveArea
    318     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    319     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    320     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    321     cmp     r8, #0                      @ suspendCount != 0
    322     bxne    r12                         @ bail to the interpreter
    323 
    324     ldr     r3, [r9, #offClassObject_pDvmDex] @ r3<- method->clazz->pDvmDex
    325     ldr     r2, [rGLUE, #offGlue_self]      @ r2<- glue->self
    326 
    327     @ Update "glue" values for the new method
    328     str     r0, [rGLUE, #offGlue_method]    @ glue->method = methodToCall
    329     str     r3, [rGLUE, #offGlue_methodClassDex] @ glue->methodClassDex = ...
    330     mov     rFP, r1                         @ fp = newFp
    331     str     rFP, [r2, #offThread_curFrame]  @ self->curFrame = newFp
    332 
    333     bx      lr                              @ return to the callee-chaining cell
    334 
    335 /* ------------------------------ */
    336     .balign 4
    337     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN
    338 dvmCompiler_TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN:
    339 /* File: armv5te/TEMPLATE_INVOKE_METHOD_PREDICTED_CHAIN.S */
    340     /*
    341      * For polymorphic callsite, check whether the cached class pointer matches
    342      * the current one. If so setup the Dalvik frame and return to the
    343      * Thumb code through the link register to transfer control to the callee
    344      * method through a dedicated chaining cell.
    345      *
    346      * The predicted chaining cell is declared in ArmLIR.h with the
    347      * following layout:
    348      *
    349      *  typedef struct PredictedChainingCell {
    350      *      u4 branch;
    351      *      const ClassObject *clazz;
    352      *      const Method *method;
    353      *      u4 counter;
    354      *  } PredictedChainingCell;
    355      *
    356      * Upon returning to the callsite:
    357      *    - lr  : to branch to the chaining cell
    358      *    - lr+2: to punt to the interpreter
    359      *    - lr+4: to fully resolve the callee and may rechain.
    360      *            r3 <- class
    361      *            r9 <- counter
    362      */
    363     @ r0 = this, r1 = returnCell, r2 = predictedChainCell, rPC = dalvikCallsite
    364     ldr     r3, [r0, #offObject_clazz]  @ r3 <- this->class
    365     ldr     r8, [r2, #4]    @ r8 <- predictedChainCell->clazz
    366     ldr     r0, [r2, #8]    @ r0 <- predictedChainCell->method
    367     ldr     r9, [rGLUE, #offGlue_icRechainCount]   @ r1 <- shared rechainCount
    368     cmp     r3, r8          @ predicted class == actual class?
    369 #if defined(WITH_JIT_TUNING)
    370     ldr     r7, .LdvmICHitCount
    371     ldreq   r10, [r7, #0]
    372     add     r10, r10, #1
    373     streq   r10, [r7, #0]
    374 #endif
    375     beq     .LinvokeChain   @ predicted chain is valid
    376     ldr     r7, [r3, #offClassObject_vtable] @ r7 <- this->class->vtable
    377     cmp     r8, #0          @ initialized class or not
    378     moveq   r1, #0
    379     subne   r1, r9, #1      @ count--
    380     strne   r1, [rGLUE, #offGlue_icRechainCount]   @ write back to InterpState
    381     add     lr, lr, #4      @ return to fully-resolve landing pad
    382     /*
    383      * r1 <- count
    384      * r2 <- &predictedChainCell
    385      * r3 <- this->class
    386      * r4 <- dPC
    387      * r7 <- this->class->vtable
    388      */
    389     bx      lr
    390 
    391 /* ------------------------------ */
    392     .balign 4
    393     .global dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE
    394 dvmCompiler_TEMPLATE_INVOKE_METHOD_NATIVE:
    395 /* File: armv5te/TEMPLATE_INVOKE_METHOD_NATIVE.S */
    396     @ r0 = methodToCall, r1 = returnCell, rPC = dalvikCallsite
    397     ldrh    r7, [r0, #offMethod_registersSize]  @ r7<- methodToCall->regsSize
    398     ldr     r9, [rGLUE, #offGlue_interpStackEnd]    @ r9<- interpStackEnd
    399     ldr     r8, [rGLUE, #offGlue_pSelfSuspendCount] @ r8<- &suspendCount
    400     add     r3, r1, #1  @ Thumb addr is odd
    401     SAVEAREA_FROM_FP(r1, rFP)           @ r1<- stack save area
    402     sub     r1, r1, r7, lsl #2          @ r1<- newFp (old savearea - regsSize)
    403     SAVEAREA_FROM_FP(r10, r1)           @ r10<- stack save area
    404     ldr     r8, [r8]                    @ r3<- suspendCount (int)
    405     cmp     r10, r9                     @ bottom < interpStackEnd?
    406     bxlo    lr                          @ return to raise stack overflow excep.
    407     @ r1 = newFP, r0 = methodToCall, r3 = returnCell, rPC = dalvikCallsite
    408     str     rPC, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    409     str     rPC, [r1, #(offStackSaveArea_savedPc - sizeofStackSaveArea)]
    410     ldr     rPC, [r0, #offMethod_insns]     @ rPC<- methodToCall->insns
    411 
    412 
    413     @ set up newSaveArea
    414     str     rFP, [r1, #(offStackSaveArea_prevFrame - sizeofStackSaveArea)]
    415     str     r3, [r1, #(offStackSaveArea_returnAddr - sizeofStackSaveArea)]
    416     ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
    417     str     r0, [r1, #(offStackSaveArea_method - sizeofStackSaveArea)]
    418     cmp     r8, #0                      @ suspendCount != 0
    419     ldr     r8, [r0, #offMethod_nativeFunc] @ r8<- method->nativeFunc
    420 #if !defined(WITH_SELF_VERIFICATION)
    421     bxne    lr                          @ bail to the interpreter
    422 #else
    423     bx      lr                          @ bail to interpreter unconditionally
    424 #endif
    425 
    426     @ go ahead and transfer control to the native code
    427     ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
    428     mov     r2, #0
    429     str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
    430     str     r2, [r3, #offThread_inJitCodeCache] @ not in the jit code cache
    431     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
    432                                         @ newFp->localRefCookie=top
    433     mov     r9, r3                      @ r9<- glue->self (preserve)
    434     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
    435 
    436     mov     r2, r0                      @ r2<- methodToCall
    437     mov     r0, r1                      @ r0<- newFP
    438     add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
    439 
    440     blx     r8                          @ off to the native code
    441 
    442     @ native return; r9=self, r10=newSaveArea
    443     @ equivalent to dvmPopJniLocals
    444     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
    445     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
    446     ldr     r1, [r9, #offThread_exception] @ check for exception
    447     str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
    448     cmp     r1, #0                      @ null?
    449     str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
    450     ldr     r0, [rFP, #(offStackSaveArea_currentPc - sizeofStackSaveArea)]
    451 
    452     @ r0 = dalvikCallsitePC
    453     bne     .LhandleException           @ no, handle exception
    454 
    455     str     r2, [r9, #offThread_inJitCodeCache] @ set the mode properly
    456     cmp     r2, #0                      @ return chaining cell still exists?
    457     bxne    r2                          @ yes - go ahead
    458 
    459     @ continue executing the next instruction through the interpreter
    460     ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
    461     add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
    462 #if defined(WITH_JIT_TUNING)
    463     mov     r0, #kCallsiteInterpreted
    464 #endif
    465     mov     pc, r1
    466 
    467 /* ------------------------------ */
    468     .balign 4
    469     .global dvmCompiler_TEMPLATE_MUL_LONG
    470 dvmCompiler_TEMPLATE_MUL_LONG:
    471 /* File: armv5te/TEMPLATE_MUL_LONG.S */
    472     /*
    473      * Signed 64-bit integer multiply.
    474      *
    475      * For JIT: op1 in r0/r1, op2 in r2/r3, return in r0/r1
    476      *
    477      * Consider WXxYZ (r1r0 x r3r2) with a long multiply:
    478      *        WX
    479      *      x YZ
    480      *  --------
    481      *     ZW ZX
    482      *  YW YX
    483      *
    484      * The low word of the result holds ZX, the high word holds
    485      * (ZW+YX) + (the high overflow from ZX).  YW doesn't matter because
    486      * it doesn't fit in the low 64 bits.
    487      *
    488      * Unlike most ARM math operations, multiply instructions have
    489      * restrictions on using the same register more than once (Rd and Rm
    490      * cannot be the same).
    491      */
    492     /* mul-long vAA, vBB, vCC */
    493     mul     ip, r2, r1                  @  ip<- ZxW
    494     umull   r9, r10, r2, r0             @  r9/r10 <- ZxX
    495     mla     r2, r0, r3, ip              @  r2<- YxX + (ZxW)
    496     add     r10, r2, r10                @  r10<- r10 + low(ZxW + (YxX))
    497     mov     r0,r9
    498     mov     r1,r10
    499     bx      lr
    500 
    501 /* ------------------------------ */
    502     .balign 4
    503     .global dvmCompiler_TEMPLATE_SHL_LONG
    504 dvmCompiler_TEMPLATE_SHL_LONG:
    505 /* File: armv5te/TEMPLATE_SHL_LONG.S */
    506     /*
    507      * Long integer shift.  This is different from the generic 32/64-bit
    508      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    509      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    510      * 6 bits.
    511      */
    512     /* shl-long vAA, vBB, vCC */
    513     and     r2, r2, #63                 @ r2<- r2 & 0x3f
    514     mov     r1, r1, asl r2              @  r1<- r1 << r2
    515     rsb     r3, r2, #32                 @  r3<- 32 - r2
    516     orr     r1, r1, r0, lsr r3          @  r1<- r1 | (r0 << (32-r2))
    517     subs    ip, r2, #32                 @  ip<- r2 - 32
    518     movpl   r1, r0, asl ip              @  if r2 >= 32, r1<- r0 << (r2-32)
    519     mov     r0, r0, asl r2              @  r0<- r0 << r2
    520     bx      lr
    521 
    522 /* ------------------------------ */
    523     .balign 4
    524     .global dvmCompiler_TEMPLATE_SHR_LONG
    525 dvmCompiler_TEMPLATE_SHR_LONG:
    526 /* File: armv5te/TEMPLATE_SHR_LONG.S */
    527     /*
    528      * Long integer shift.  This is different from the generic 32/64-bit
    529      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    530      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    531      * 6 bits.
    532      */
    533     /* shr-long vAA, vBB, vCC */
    534     and     r2, r2, #63                 @ r0<- r0 & 0x3f
    535     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
    536     rsb     r3, r2, #32                 @  r3<- 32 - r2
    537     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
    538     subs    ip, r2, #32                 @  ip<- r2 - 32
    539     movpl   r0, r1, asr ip              @  if r2 >= 32, r0<-r1 >> (r2-32)
    540     mov     r1, r1, asr r2              @  r1<- r1 >> r2
    541     bx      lr
    542 
    543 /* ------------------------------ */
    544     .balign 4
    545     .global dvmCompiler_TEMPLATE_USHR_LONG
    546 dvmCompiler_TEMPLATE_USHR_LONG:
    547 /* File: armv5te/TEMPLATE_USHR_LONG.S */
    548     /*
    549      * Long integer shift.  This is different from the generic 32/64-bit
    550      * binary operations because vAA/vBB are 64-bit but vCC (the shift
    551      * distance) is 32-bit.  Also, Dalvik requires us to ignore all but the low
    552      * 6 bits.
    553      */
    554     /* ushr-long vAA, vBB, vCC */
    555     and     r2, r2, #63                 @ r0<- r0 & 0x3f
    556     mov     r0, r0, lsr r2              @  r0<- r2 >> r2
    557     rsb     r3, r2, #32                 @  r3<- 32 - r2
    558     orr     r0, r0, r1, asl r3          @  r0<- r0 | (r1 << (32-r2))
    559     subs    ip, r2, #32                 @  ip<- r2 - 32
    560     movpl   r0, r1, lsr ip              @  if r2 >= 32, r0<-r1 >>> (r2-32)
    561     mov     r1, r1, lsr r2              @  r1<- r1 >>> r2
    562     bx      lr
    563 
    564 /* ------------------------------ */
    565     .balign 4
    566     .global dvmCompiler_TEMPLATE_ADD_FLOAT_VFP
    567 dvmCompiler_TEMPLATE_ADD_FLOAT_VFP:
    568 /* File: armv5te-vfp/TEMPLATE_ADD_FLOAT_VFP.S */
    569 /* File: armv5te-vfp/fbinop.S */
    570     /*
    571      * Generic 32-bit floating point operation.  Provide an "instr" line that
    572      * specifies an instruction that performs s2 = s0 op s1.
    573      *
    574      * On entry:
    575      *     r0 = target dalvik register address
    576      *     r1 = op1 address
    577      *     r2 = op2 address
    578      */
    579      flds    s0,[r1]
    580      flds    s1,[r2]
    581      fadds   s2, s0, s1
    582      fsts    s2,[r0]
    583      bx      lr
    584 
    585 
    586 /* ------------------------------ */
    587     .balign 4
    588     .global dvmCompiler_TEMPLATE_SUB_FLOAT_VFP
    589 dvmCompiler_TEMPLATE_SUB_FLOAT_VFP:
    590 /* File: armv5te-vfp/TEMPLATE_SUB_FLOAT_VFP.S */
    591 /* File: armv5te-vfp/fbinop.S */
    592     /*
    593      * Generic 32-bit floating point operation.  Provide an "instr" line that
    594      * specifies an instruction that performs s2 = s0 op s1.
    595      *
    596      * On entry:
    597      *     r0 = target dalvik register address
    598      *     r1 = op1 address
    599      *     r2 = op2 address
    600      */
    601      flds    s0,[r1]
    602      flds    s1,[r2]
    603      fsubs   s2, s0, s1
    604      fsts    s2,[r0]
    605      bx      lr
    606 
    607 
    608 /* ------------------------------ */
    609     .balign 4
    610     .global dvmCompiler_TEMPLATE_MUL_FLOAT_VFP
    611 dvmCompiler_TEMPLATE_MUL_FLOAT_VFP:
    612 /* File: armv5te-vfp/TEMPLATE_MUL_FLOAT_VFP.S */
    613 /* File: armv5te-vfp/fbinop.S */
    614     /*
    615      * Generic 32-bit floating point operation.  Provide an "instr" line that
    616      * specifies an instruction that performs s2 = s0 op s1.
    617      *
    618      * On entry:
    619      *     r0 = target dalvik register address
    620      *     r1 = op1 address
    621      *     r2 = op2 address
    622      */
    623      flds    s0,[r1]
    624      flds    s1,[r2]
    625      fmuls   s2, s0, s1
    626      fsts    s2,[r0]
    627      bx      lr
    628 
    629 
    630 /* ------------------------------ */
    631     .balign 4
    632     .global dvmCompiler_TEMPLATE_DIV_FLOAT_VFP
    633 dvmCompiler_TEMPLATE_DIV_FLOAT_VFP:
    634 /* File: armv5te-vfp/TEMPLATE_DIV_FLOAT_VFP.S */
    635 /* File: armv5te-vfp/fbinop.S */
    636     /*
    637      * Generic 32-bit floating point operation.  Provide an "instr" line that
    638      * specifies an instruction that performs s2 = s0 op s1.
    639      *
    640      * On entry:
    641      *     r0 = target dalvik register address
    642      *     r1 = op1 address
    643      *     r2 = op2 address
    644      */
    645      flds    s0,[r1]
    646      flds    s1,[r2]
    647      fdivs   s2, s0, s1
    648      fsts    s2,[r0]
    649      bx      lr
    650 
    651 
    652 /* ------------------------------ */
    653     .balign 4
    654     .global dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP
    655 dvmCompiler_TEMPLATE_ADD_DOUBLE_VFP:
    656 /* File: armv5te-vfp/TEMPLATE_ADD_DOUBLE_VFP.S */
    657 /* File: armv5te-vfp/fbinopWide.S */
    658     /*
    659      * Generic 64-bit floating point operation.  Provide an "instr" line that
    660      * specifies an instruction that performs s2 = s0 op s1.
    661      *
    662      * On entry:
    663      *     r0 = target dalvik register address
    664      *     r1 = op1 address
    665      *     r2 = op2 address
    666      */
    667      fldd    d0,[r1]
    668      fldd    d1,[r2]
    669      faddd   d2, d0, d1
    670      fstd    d2,[r0]
    671      bx      lr
    672 
    673 
    674 /* ------------------------------ */
    675     .balign 4
    676     .global dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP
    677 dvmCompiler_TEMPLATE_SUB_DOUBLE_VFP:
    678 /* File: armv5te-vfp/TEMPLATE_SUB_DOUBLE_VFP.S */
    679 /* File: armv5te-vfp/fbinopWide.S */
    680     /*
    681      * Generic 64-bit floating point operation.  Provide an "instr" line that
    682      * specifies an instruction that performs s2 = s0 op s1.
    683      *
    684      * On entry:
    685      *     r0 = target dalvik register address
    686      *     r1 = op1 address
    687      *     r2 = op2 address
    688      */
    689      fldd    d0,[r1]
    690      fldd    d1,[r2]
    691      fsubd   d2, d0, d1
    692      fstd    d2,[r0]
    693      bx      lr
    694 
    695 
    696 /* ------------------------------ */
    697     .balign 4
    698     .global dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP
    699 dvmCompiler_TEMPLATE_MUL_DOUBLE_VFP:
    700 /* File: armv5te-vfp/TEMPLATE_MUL_DOUBLE_VFP.S */
    701 /* File: armv5te-vfp/fbinopWide.S */
    702     /*
    703      * Generic 64-bit floating point operation.  Provide an "instr" line that
    704      * specifies an instruction that performs s2 = s0 op s1.
    705      *
    706      * On entry:
    707      *     r0 = target dalvik register address
    708      *     r1 = op1 address
    709      *     r2 = op2 address
    710      */
    711      fldd    d0,[r1]
    712      fldd    d1,[r2]
    713      fmuld   d2, d0, d1
    714      fstd    d2,[r0]
    715      bx      lr
    716 
    717 
    718 /* ------------------------------ */
    719     .balign 4
    720     .global dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP
    721 dvmCompiler_TEMPLATE_DIV_DOUBLE_VFP:
    722 /* File: armv5te-vfp/TEMPLATE_DIV_DOUBLE_VFP.S */
    723 /* File: armv5te-vfp/fbinopWide.S */
    724     /*
    725      * Generic 64-bit floating point operation.  Provide an "instr" line that
    726      * specifies an instruction that performs s2 = s0 op s1.
    727      *
    728      * On entry:
    729      *     r0 = target dalvik register address
    730      *     r1 = op1 address
    731      *     r2 = op2 address
    732      */
    733      fldd    d0,[r1]
    734      fldd    d1,[r2]
    735      fdivd   d2, d0, d1
    736      fstd    d2,[r0]
    737      bx      lr
    738 
    739 
    740 /* ------------------------------ */
    741     .balign 4
    742     .global dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP
    743 dvmCompiler_TEMPLATE_DOUBLE_TO_FLOAT_VFP:
    744 /* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_FLOAT_VFP.S */
    745 /* File: armv5te-vfp/funopNarrower.S */
    746     /*
    747      * Generic 64bit-to-32bit floating point unary operation.  Provide an
    748      * "instr" line that specifies an instruction that performs "s0 = op d0".
    749      *
    750      * For: double-to-int, double-to-float
    751      *
    752      * On entry:
    753      *     r0 = target dalvik register address
    754      *     r1 = src dalvik register address
    755      */
    756     /* unop vA, vB */
    757     fldd    d0, [r1]                    @ d0<- vB
    758     fcvtsd  s0, d0                              @ s0<- op d0
    759     fsts    s0, [r0]                    @ vA<- s0
    760     bx      lr
    761 
    762 
    763 /* ------------------------------ */
    764     .balign 4
    765     .global dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP
    766 dvmCompiler_TEMPLATE_DOUBLE_TO_INT_VFP:
    767 /* File: armv5te-vfp/TEMPLATE_DOUBLE_TO_INT_VFP.S */
    768 /* File: armv5te-vfp/funopNarrower.S */
    769     /*
    770      * Generic 64bit-to-32bit floating point unary operation.  Provide an
    771      * "instr" line that specifies an instruction that performs "s0 = op d0".
    772      *
    773      * For: double-to-int, double-to-float
    774      *
    775      * On entry:
    776      *     r0 = target dalvik register address
    777      *     r1 = src dalvik register address
    778      */
    779     /* unop vA, vB */
    780     fldd    d0, [r1]                    @ d0<- vB
    781     ftosizd  s0, d0                              @ s0<- op d0
    782     fsts    s0, [r0]                    @ vA<- s0
    783     bx      lr
    784 
    785 
    786 /* ------------------------------ */
    787     .balign 4
    788     .global dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP
    789 dvmCompiler_TEMPLATE_FLOAT_TO_DOUBLE_VFP:
    790 /* File: armv5te-vfp/TEMPLATE_FLOAT_TO_DOUBLE_VFP.S */
    791 /* File: armv5te-vfp/funopWider.S */
    792     /*
    793      * Generic 32bit-to-64bit floating point unary operation.  Provide an
    794      * "instr" line that specifies an instruction that performs "d0 = op s0".
    795      *
    796      * For: int-to-double, float-to-double
    797      *
    798      * On entry:
    799      *     r0 = target dalvik register address
    800      *     r1 = src dalvik register address
    801      */
    802     /* unop vA, vB */
    803     flds    s0, [r1]                    @ s0<- vB
    804     fcvtds  d0, s0                              @ d0<- op s0
    805     fstd    d0, [r0]                    @ vA<- d0
    806     bx      lr
    807 
    808 
    809 /* ------------------------------ */
    810     .balign 4
    811     .global dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP
    812 dvmCompiler_TEMPLATE_FLOAT_TO_INT_VFP:
    813 /* File: armv5te-vfp/TEMPLATE_FLOAT_TO_INT_VFP.S */
    814 /* File: armv5te-vfp/funop.S */
    815     /*
    816      * Generic 32bit-to-32bit floating point unary operation.  Provide an
    817      * "instr" line that specifies an instruction that performs "s1 = op s0".
    818      *
    819      * For: float-to-int, int-to-float
    820      *
    821      * On entry:
    822      *     r0 = target dalvik register address
    823      *     r1 = src dalvik register address
    824      */
    825     /* unop vA, vB */
    826     flds    s0, [r1]                    @ s0<- vB
    827     ftosizs s1, s0                              @ s1<- op s0
    828     fsts    s1, [r0]                    @ vA<- s1
    829     bx      lr
    830 
    831 
    832 /* ------------------------------ */
    833     .balign 4
    834     .global dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP
    835 dvmCompiler_TEMPLATE_INT_TO_DOUBLE_VFP:
    836 /* File: armv5te-vfp/TEMPLATE_INT_TO_DOUBLE_VFP.S */
    837 /* File: armv5te-vfp/funopWider.S */
    838     /*
    839      * Generic 32bit-to-64bit floating point unary operation.  Provide an
    840      * "instr" line that specifies an instruction that performs "d0 = op s0".
    841      *
    842      * For: int-to-double, float-to-double
    843      *
    844      * On entry:
    845      *     r0 = target dalvik register address
    846      *     r1 = src dalvik register address
    847      */
    848     /* unop vA, vB */
    849     flds    s0, [r1]                    @ s0<- vB
    850     fsitod  d0, s0                              @ d0<- op s0
    851     fstd    d0, [r0]                    @ vA<- d0
    852     bx      lr
    853 
    854 
    855 /* ------------------------------ */
    856     .balign 4
    857     .global dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP
    858 dvmCompiler_TEMPLATE_INT_TO_FLOAT_VFP:
    859 /* File: armv5te-vfp/TEMPLATE_INT_TO_FLOAT_VFP.S */
    860 /* File: armv5te-vfp/funop.S */
    861     /*
    862      * Generic 32bit-to-32bit floating point unary operation.  Provide an
    863      * "instr" line that specifies an instruction that performs "s1 = op s0".
    864      *
    865      * For: float-to-int, int-to-float
    866      *
    867      * On entry:
    868      *     r0 = target dalvik register address
    869      *     r1 = src dalvik register address
    870      */
    871     /* unop vA, vB */
    872     flds    s0, [r1]                    @ s0<- vB
    873     fsitos  s1, s0                              @ s1<- op s0
    874     fsts    s1, [r0]                    @ vA<- s1
    875     bx      lr
    876 
    877 
    878 /* ------------------------------ */
    879     .balign 4
    880     .global dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP
    881 dvmCompiler_TEMPLATE_CMPG_DOUBLE_VFP:
    882 /* File: armv5te-vfp/TEMPLATE_CMPG_DOUBLE_VFP.S */
    883     /*
    884      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    885      * destination register based on the results of the comparison.
    886      *
    887      * int compare(x, y) {
    888      *     if (x == y) {
    889      *         return 0;
    890      *     } else if (x < y) {
    891      *         return -1;
    892      *     } else if (x > y) {
    893      *         return 1;
    894      *     } else {
    895      *         return 1;
    896      *     }
    897      * }
    898      *
    899      * On entry:
    900      *    r0 = &op1 [vBB]
    901      *    r1 = &op2 [vCC]
    902      */
    903     /* op vAA, vBB, vCC */
    904     fldd    d0, [r0]                    @ d0<- vBB
    905     fldd    d1, [r1]                    @ d1<- vCC
    906     fcmpd  d0, d1                       @ compare (vBB, vCC)
    907     mov     r0, #1                      @ r0<- 1 (default)
    908     fmstat                              @ export status flags
    909     mvnmi   r0, #0                      @ (less than) r0<- -1
    910     moveq   r0, #0                      @ (equal) r0<- 0
    911     bx      lr
    912 
    913 /* ------------------------------ */
    914     .balign 4
    915     .global dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP
    916 dvmCompiler_TEMPLATE_CMPL_DOUBLE_VFP:
    917 /* File: armv5te-vfp/TEMPLATE_CMPL_DOUBLE_VFP.S */
    918     /*
    919      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    920      * destination register based on the results of the comparison.
    921      *
    922      * int compare(x, y) {
    923      *     if (x == y) {
    924      *         return 0;
    925      *     } else if (x > y) {
    926      *         return 1;
    927      *     } else if (x < y) {
    928      *         return -1;
    929      *     } else {
    930      *         return -1;
    931      *     }
    932      * }
    933      * On entry:
    934      *    r0 = &op1 [vBB]
    935      *    r1 = &op2 [vCC]
    936      */
    937     /* op vAA, vBB, vCC */
    938     fldd    d0, [r0]                    @ d0<- vBB
    939     fldd    d1, [r1]                    @ d1<- vCC
    940     fcmped  d0, d1                      @ compare (vBB, vCC)
    941     mvn     r0, #0                      @ r0<- -1 (default)
    942     fmstat                              @ export status flags
    943     movgt   r0, #1                      @ (greater than) r0<- 1
    944     moveq   r0, #0                      @ (equal) r0<- 0
    945     bx      lr
    946 
    947 /* ------------------------------ */
    948     .balign 4
    949     .global dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP
    950 dvmCompiler_TEMPLATE_CMPG_FLOAT_VFP:
    951 /* File: armv5te-vfp/TEMPLATE_CMPG_FLOAT_VFP.S */
    952     /*
    953      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    954      * destination register based on the results of the comparison.
    955      *
    956      * int compare(x, y) {
    957      *     if (x == y) {
    958      *         return 0;
    959      *     } else if (x < y) {
    960      *         return -1;
    961      *     } else if (x > y) {
    962      *         return 1;
    963      *     } else {
    964      *         return 1;
    965      *     }
    966      * }
    967      * On entry:
    968      *    r0 = &op1 [vBB]
    969      *    r1 = &op2 [vCC]
    970      */
    971     /* op vAA, vBB, vCC */
    972     flds    s0, [r0]                    @ d0<- vBB
    973     flds    s1, [r1]                    @ d1<- vCC
    974     fcmps  s0, s1                      @ compare (vBB, vCC)
    975     mov     r0, #1                      @ r0<- 1 (default)
    976     fmstat                              @ export status flags
    977     mvnmi   r0, #0                      @ (less than) r0<- -1
    978     moveq   r0, #0                      @ (equal) r0<- 0
    979     bx      lr
    980 
    981 /* ------------------------------ */
    982     .balign 4
    983     .global dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP
    984 dvmCompiler_TEMPLATE_CMPL_FLOAT_VFP:
    985 /* File: armv5te-vfp/TEMPLATE_CMPL_FLOAT_VFP.S */
    986     /*
    987      * Compare two floating-point values.  Puts 0, 1, or -1 into the
    988      * destination register based on the results of the comparison.
    989      *
    990      * int compare(x, y) {
    991      *     if (x == y) {
    992      *         return 0;
    993      *     } else if (x > y) {
    994      *         return 1;
    995      *     } else if (x < y) {
    996      *         return -1;
    997      *     } else {
    998      *         return -1;
    999      *     }
   1000      * }
   1001      * On entry:
   1002      *    r0 = &op1 [vBB]
   1003      *    r1 = &op2 [vCC]
   1004      */
   1005     /* op vAA, vBB, vCC */
   1006     flds    s0, [r0]                    @ d0<- vBB
   1007     flds    s1, [r1]                    @ d1<- vCC
   1008     fcmps  s0, s1                      @ compare (vBB, vCC)
   1009     mvn     r0, #0                      @ r0<- -1 (default)
   1010     fmstat                              @ export status flags
   1011     movgt   r0, #1                      @ (greater than) r0<- 1
   1012     moveq   r0, #0                      @ (equal) r0<- 0
   1013     bx      lr
   1014 
   1015 /* ------------------------------ */
   1016     .balign 4
   1017     .global dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP
   1018 dvmCompiler_TEMPLATE_SQRT_DOUBLE_VFP:
   1019 /* File: armv5te-vfp/TEMPLATE_SQRT_DOUBLE_VFP.S */
   1020     /*
   1021      * 64-bit floating point vfp sqrt operation.
   1022      * If the result is a NaN, bail out to library code to do
   1023      * the right thing.
   1024      *
   1025      * On entry:
   1026      *     r2 src addr of op1
   1027      * On exit:
   1028      *     r0,r1 = res
   1029      */
   1030     fldd    d0, [r2]
   1031     fsqrtd  d1, d0
   1032     fcmpd   d1, d1
   1033     fmstat
   1034     fmrrd   r0, r1, d1
   1035     bxeq    lr   @ Result OK - return
   1036     ldr     r2, .Lsqrt
   1037     fmrrd   r0, r1, d0   @ reload orig operand
   1038     bx      r2   @ tail call to sqrt library routine
   1039 
   1040 .Lsqrt:
   1041     .word   sqrt
   1042 
   1043 /* ------------------------------ */
   1044     .balign 4
   1045     .global dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON
   1046 dvmCompiler_TEMPLATE_THROW_EXCEPTION_COMMON:
   1047 /* File: armv5te/TEMPLATE_THROW_EXCEPTION_COMMON.S */
   1048     /*
   1049      * Throw an exception from JIT'ed code.
   1050      * On entry:
   1051      *    r0    Dalvik PC that raises the exception
   1052      */
   1053     b       .LhandleException
   1054 
   1055 /* ------------------------------ */
   1056     .balign 4
   1057     .global dvmCompiler_TEMPLATE_MEM_OP_DECODE
   1058 dvmCompiler_TEMPLATE_MEM_OP_DECODE:
   1059 /* File: armv5te-vfp/TEMPLATE_MEM_OP_DECODE.S */
   1060 #if defined(WITH_SELF_VERIFICATION)
   1061     /*
   1062      * This handler encapsulates heap memory ops for selfVerification mode.
   1063      *
   1064      * The call to the handler is inserted prior to a heap memory operation.
   1065      * This handler then calls a function to decode the memory op, and process
   1066      * it accordingly. Afterwards, the handler changes the return address to
   1067      * skip the memory op so it never gets executed.
   1068      */
   1069     vpush   {d0-d15}                    @ save out all fp registers
   1070     push    {r0-r12,lr}                 @ save out all registers
   1071     mov     r0, lr                      @ arg0 <- link register
   1072     mov     r1, sp                      @ arg1 <- stack pointer
   1073     ldr     r2, .LdvmSelfVerificationMemOpDecode @ defined in footer.S
   1074     blx     r2                          @ decode and handle the mem op
   1075     pop     {r0-r12,lr}                 @ restore all registers
   1076     vpop    {d0-d15}                    @ restore all fp registers
   1077     bx      lr                          @ return to compiled code
   1078 #endif
   1079 
   1080 /* ------------------------------ */
   1081     .balign 4
   1082     .global dvmCompiler_TEMPLATE_STRING_COMPARETO
   1083 dvmCompiler_TEMPLATE_STRING_COMPARETO:
   1084 /* File: armv5te/TEMPLATE_STRING_COMPARETO.S */
   1085     /*
   1086      * String's compareTo.
   1087      *
   1088      * Requires r0/r1 to have been previously checked for null.  Will
   1089      * return negative if this's string is < comp, 0 if they are the
   1090      * same and positive if >.
   1091      *
   1092      * IMPORTANT NOTE:
   1093      *
   1094      * This code relies on hard-coded offsets for string objects, and must be
   1095      * kept in sync with definitions in UtfString.h.  See asm-constants.h
   1096      *
   1097      * On entry:
   1098      *    r0:   this object pointer
   1099      *    r1:   comp object pointer
   1100      *
   1101      */
   1102 
   1103     mov    r2, r0         @ this to r2, opening up r0 for return value
   1104     subs   r0, r2, r1     @ Same?
   1105     bxeq   lr
   1106 
   1107     ldr    r4, [r2, #STRING_FIELDOFF_OFFSET]
   1108     ldr    r9, [r1, #STRING_FIELDOFF_OFFSET]
   1109     ldr    r7, [r2, #STRING_FIELDOFF_COUNT]
   1110     ldr    r10, [r1, #STRING_FIELDOFF_COUNT]
   1111     ldr    r2, [r2, #STRING_FIELDOFF_VALUE]
   1112     ldr    r1, [r1, #STRING_FIELDOFF_VALUE]
   1113 
   1114     /*
   1115      * At this point, we have:
   1116      *    value:  r2/r1
   1117      *    offset: r4/r9
   1118      *    count:  r7/r10
   1119      * We're going to compute
   1120      *    r11 <- countDiff
   1121      *    r10 <- minCount
   1122      */
   1123      subs  r11, r7, r10
   1124      movls r10, r7
   1125 
   1126      /* Now, build pointers to the string data */
   1127      add   r2, r2, r4, lsl #1
   1128      add   r1, r1, r9, lsl #1
   1129      /*
   1130       * Note: data pointers point to previous element so we can use pre-index
   1131       * mode with base writeback.
   1132       */
   1133      add   r2, #16-2   @ offset to contents[-1]
   1134      add   r1, #16-2   @ offset to contents[-1]
   1135 
   1136      /*
   1137       * At this point we have:
   1138       *   r2: *this string data
   1139       *   r1: *comp string data
   1140       *   r10: iteration count for comparison
   1141       *   r11: value to return if the first part of the string is equal
   1142       *   r0: reserved for result
   1143       *   r3, r4, r7, r8, r9, r12 available for loading string data
   1144       */
   1145 
   1146     subs  r10, #2
   1147     blt   do_remainder2
   1148 
   1149       /*
   1150        * Unroll the first two checks so we can quickly catch early mismatch
   1151        * on long strings (but preserve incoming alignment)
   1152        */
   1153 
   1154     ldrh  r3, [r2, #2]!
   1155     ldrh  r4, [r1, #2]!
   1156     ldrh  r7, [r2, #2]!
   1157     ldrh  r8, [r1, #2]!
   1158     subs  r0, r3, r4
   1159     subeqs  r0, r7, r8
   1160     bxne  lr
   1161     cmp   r10, #28
   1162     bgt   do_memcmp16
   1163     subs  r10, #3
   1164     blt   do_remainder
   1165 
   1166 loopback_triple:
   1167     ldrh  r3, [r2, #2]!
   1168     ldrh  r4, [r1, #2]!
   1169     ldrh  r7, [r2, #2]!
   1170     ldrh  r8, [r1, #2]!
   1171     ldrh  r9, [r2, #2]!
   1172     ldrh  r12,[r1, #2]!
   1173     subs  r0, r3, r4
   1174     subeqs  r0, r7, r8
   1175     subeqs  r0, r9, r12
   1176     bxne  lr
   1177     subs  r10, #3
   1178     bge   loopback_triple
   1179 
   1180 do_remainder:
   1181     adds  r10, #3
   1182     beq   returnDiff
   1183 
   1184 loopback_single:
   1185     ldrh  r3, [r2, #2]!
   1186     ldrh  r4, [r1, #2]!
   1187     subs  r0, r3, r4
   1188     bxne  lr
   1189     subs  r10, #1
   1190     bne     loopback_single
   1191 
   1192 returnDiff:
   1193     mov   r0, r11
   1194     bx    lr
   1195 
   1196 do_remainder2:
   1197     adds  r10, #2
   1198     bne   loopback_single
   1199     mov   r0, r11
   1200     bx    lr
   1201 
   1202     /* Long string case */
   1203 do_memcmp16:
   1204     mov   r4, lr
   1205     ldr   lr, .Lmemcmp16
   1206     mov   r7, r11
   1207     add   r0, r2, #2
   1208     add   r1, r1, #2
   1209     mov   r2, r10
   1210     blx   lr
   1211     cmp   r0, #0
   1212     bxne  r4
   1213     mov   r0, r7
   1214     bx    r4
   1215 
   1216 .Lmemcmp16:
   1217     .word __memcmp16
   1218 
   1219 /* ------------------------------ */
   1220     .balign 4
   1221     .global dvmCompiler_TEMPLATE_STRING_INDEXOF
   1222 dvmCompiler_TEMPLATE_STRING_INDEXOF:
   1223 /* File: armv5te/TEMPLATE_STRING_INDEXOF.S */
   1224     /*
   1225      * String's indexOf.
   1226      *
   1227      * Requires r0 to have been previously checked for null.  Will
   1228      * return index of match of r1 in r0.
   1229      *
   1230      * IMPORTANT NOTE:
   1231      *
   1232      * This code relies on hard-coded offsets for string objects, and must be
   1233      * kept in sync wth definitions in UtfString.h  See asm-constants.h
   1234      *
   1235      * On entry:
   1236      *    r0:   string object pointer
   1237      *    r1:   char to match
   1238      *    r2:   Starting offset in string data
   1239      */
   1240 
   1241     ldr    r7, [r0, #STRING_FIELDOFF_OFFSET]
   1242     ldr    r8, [r0, #STRING_FIELDOFF_COUNT]
   1243     ldr    r0, [r0, #STRING_FIELDOFF_VALUE]
   1244 
   1245     /*
   1246      * At this point, we have:
   1247      *    r0: object pointer
   1248      *    r1: char to match
   1249      *    r2: starting offset
   1250      *    r7: offset
   1251      *    r8: string length
   1252      */
   1253 
   1254      /* Build pointer to start of string data */
   1255      add   r0, #16
   1256      add   r0, r0, r7, lsl #1
   1257 
   1258      /* Save a copy of starting data in r7 */
   1259      mov   r7, r0
   1260 
   1261      /* Clamp start to [0..count] */
   1262      cmp   r2, #0
   1263      movlt r2, #0
   1264      cmp   r2, r8
   1265      movgt r2, r8
   1266 
   1267      /* Build pointer to start of data to compare and pre-bias */
   1268      add   r0, r0, r2, lsl #1
   1269      sub   r0, #2
   1270 
   1271      /* Compute iteration count */
   1272      sub   r8, r2
   1273 
   1274      /*
   1275       * At this point we have:
   1276       *   r0: start of data to test
   1277       *   r1: chat to compare
   1278       *   r8: iteration count
   1279       *   r7: original start of string
   1280       *   r3, r4, r9, r10, r11, r12 available for loading string data
   1281       */
   1282 
   1283     subs  r8, #4
   1284     blt   indexof_remainder
   1285 
   1286 indexof_loop4:
   1287     ldrh  r3, [r0, #2]!
   1288     ldrh  r4, [r0, #2]!
   1289     ldrh  r10, [r0, #2]!
   1290     ldrh  r11, [r0, #2]!
   1291     cmp   r3, r1
   1292     beq   match_0
   1293     cmp   r4, r1
   1294     beq   match_1
   1295     cmp   r10, r1
   1296     beq   match_2
   1297     cmp   r11, r1
   1298     beq   match_3
   1299     subs  r8, #4
   1300     bge   indexof_loop4
   1301 
   1302 indexof_remainder:
   1303     adds    r8, #4
   1304     beq     indexof_nomatch
   1305 
   1306 indexof_loop1:
   1307     ldrh  r3, [r0, #2]!
   1308     cmp   r3, r1
   1309     beq   match_3
   1310     subs  r8, #1
   1311     bne   indexof_loop1
   1312 
   1313 indexof_nomatch:
   1314     mov   r0, #-1
   1315     bx    lr
   1316 
   1317 match_0:
   1318     sub   r0, #6
   1319     sub   r0, r7
   1320     asr   r0, r0, #1
   1321     bx    lr
   1322 match_1:
   1323     sub   r0, #4
   1324     sub   r0, r7
   1325     asr   r0, r0, #1
   1326     bx    lr
   1327 match_2:
   1328     sub   r0, #2
   1329     sub   r0, r7
   1330     asr   r0, r0, #1
   1331     bx    lr
   1332 match_3:
   1333     sub   r0, r7
   1334     asr   r0, r0, #1
   1335     bx    lr
   1336 
   1337 /* ------------------------------ */
   1338     .balign 4
   1339     .global dvmCompiler_TEMPLATE_INTERPRET
   1340 dvmCompiler_TEMPLATE_INTERPRET:
   1341 /* File: armv5te/TEMPLATE_INTERPRET.S */
   1342     /*
   1343      * This handler transfers control to the interpeter without performing
   1344      * any lookups.  It may be called either as part of a normal chaining
   1345      * operation, or from the transition code in header.S.  We distinquish
   1346      * the two cases by looking at the link register.  If called from a
   1347      * translation chain, it will point to the chaining Dalvik PC -3.
   1348      * On entry:
   1349      *    lr - if NULL:
   1350      *        r1 - the Dalvik PC to begin interpretation.
   1351      *    else
   1352      *        [lr, #3] contains Dalvik PC to begin interpretation
   1353      *    rGLUE - pointer to interpState
   1354      *    rFP - Dalvik frame pointer
   1355      */
   1356     cmp     lr, #0
   1357     ldrne   r1,[lr, #3]
   1358     ldr     r2, .LinterpPunt
   1359     mov     r0, r1                       @ set Dalvik PC
   1360     bx      r2
   1361     @ doesn't return
   1362 
   1363 .LinterpPunt:
   1364     .word   dvmJitToInterpPunt
   1365 
   1366 /* ------------------------------ */
   1367     .balign 4
   1368     .global dvmCompiler_TEMPLATE_MONITOR_ENTER
   1369 dvmCompiler_TEMPLATE_MONITOR_ENTER:
   1370 /* File: armv5te/TEMPLATE_MONITOR_ENTER.S */
   1371     /*
   1372      * Call out to the runtime to lock an object.  Because this thread
   1373      * may have been suspended in THREAD_MONITOR state and the Jit's
   1374      * translation cache subsequently cleared, we cannot return directly.
   1375      * Instead, unconditionally transition to the interpreter to resume.
   1376      *
   1377      * On entry:
   1378      *    r0 - self pointer
   1379      *    r1 - the object (which has already been null-checked by the caller
   1380      *    r4 - the Dalvik PC of the following instruction.
   1381      */
   1382     ldr     r2, .LdvmLockObject
   1383     mov     r3, #0                       @ Record that we're not returning
   1384     str     r3, [r0, #offThread_inJitCodeCache]
   1385     blx     r2                           @ dvmLockObject(self, obj)
   1386     @ refresh Jit's on/off status
   1387     ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
   1388     ldr     r0, [r0]
   1389     ldr     r2, .LdvmJitToInterpNoChain
   1390     str     r0, [rGLUE, #offGlue_pJitProfTable]
   1391     @ Bail to interpreter - no chain [note - r4 still contains rPC]
   1392 #if defined(WITH_JIT_TUNING)
   1393     mov     r0, #kHeavyweightMonitor
   1394 #endif
   1395     bx      r2
   1396 
   1397 /* ------------------------------ */
   1398     .balign 4
   1399     .global dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG
   1400 dvmCompiler_TEMPLATE_MONITOR_ENTER_DEBUG:
   1401 /* File: armv5te/TEMPLATE_MONITOR_ENTER_DEBUG.S */
   1402     /*
   1403      * To support deadlock prediction, this version of MONITOR_ENTER
   1404      * will always call the heavyweight dvmLockObject, check for an
   1405      * exception and then bail out to the interpreter.
   1406      *
   1407      * On entry:
   1408      *    r0 - self pointer
   1409      *    r1 - the object (which has already been null-checked by the caller
   1410      *    r4 - the Dalvik PC of the following instruction.
   1411      *
   1412      */
   1413     ldr     r2, .LdvmLockObject
   1414     mov     r3, #0                       @ Record that we're not returning
   1415     str     r3, [r0, #offThread_inJitCodeCache]
   1416     blx     r2             @ dvmLockObject(self, obj)
   1417     @ refresh Jit's on/off status & test for exception
   1418     ldr     r0, [rGLUE, #offGlue_ppJitProfTable]
   1419     ldr     r1, [rGLUE, #offGlue_self]
   1420     ldr     r0, [r0]
   1421     ldr     r1, [r1, #offThread_exception]
   1422     str     r0, [rGLUE, #offGlue_pJitProfTable]
   1423     cmp     r1, #0
   1424     beq     1f
   1425     ldr     r2, .LhandleException
   1426     sub     r0, r4, #2     @ roll dPC back to this monitor instruction
   1427     bx      r2
   1428 1:
   1429     @ Bail to interpreter - no chain [note - r4 still contains rPC]
   1430 #if defined(WITH_JIT_TUNING)
   1431     mov     r0, #kHeavyweightMonitor
   1432 #endif
   1433     ldr     pc, .LdvmJitToInterpNoChain
   1434 
   1435     .size   dvmCompilerTemplateStart, .-dvmCompilerTemplateStart
   1436 /* File: armv5te/footer.S */
   1437 /*
   1438  * ===========================================================================
   1439  *  Common subroutines and data
   1440  * ===========================================================================
   1441  */
   1442 
   1443     .text
   1444     .align  2
   1445 .LinvokeNative:
   1446     @ Prep for the native call
   1447     @ r1 = newFP, r0 = methodToCall
   1448     ldr     r3, [rGLUE, #offGlue_self]      @ r3<- glue->self
   1449     mov     r2, #0
   1450     ldr     r9, [r3, #offThread_jniLocal_topCookie] @ r9<- thread->localRef->...
   1451     str     r2, [r3, #offThread_inJitCodeCache] @ not in jit code cache
   1452     str     r1, [r3, #offThread_curFrame]   @ self->curFrame = newFp
   1453     str     r9, [r1, #(offStackSaveArea_localRefCookie - sizeofStackSaveArea)]
   1454                                         @ newFp->localRefCookie=top
   1455     mov     r9, r3                      @ r9<- glue->self (preserve)
   1456     SAVEAREA_FROM_FP(r10, r1)           @ r10<- new stack save area
   1457 
   1458     mov     r2, r0                      @ r2<- methodToCall
   1459     mov     r0, r1                      @ r0<- newFP
   1460     add     r1, rGLUE, #offGlue_retval  @ r1<- &retval
   1461 
   1462     LDR_PC_LR "[r2, #offMethod_nativeFunc]"
   1463 
   1464     @ Refresh Jit's on/off status
   1465     ldr     r3, [rGLUE, #offGlue_ppJitProfTable]
   1466 
   1467     @ native return; r9=self, r10=newSaveArea
   1468     @ equivalent to dvmPopJniLocals
   1469     ldr     r2, [r10, #offStackSaveArea_returnAddr] @ r2 = chaining cell ret
   1470     ldr     r0, [r10, #offStackSaveArea_localRefCookie] @ r0<- saved->top
   1471     ldr     r1, [r9, #offThread_exception] @ check for exception
   1472     ldr     r3, [r3]    @ r1 <- pointer to Jit profile table
   1473     str     rFP, [r9, #offThread_curFrame]  @ self->curFrame = fp
   1474     cmp     r1, #0                      @ null?
   1475     str     r0, [r9, #offThread_jniLocal_topCookie] @ new top <- old top
   1476     ldr     r0, [r10, #offStackSaveArea_savedPc] @ reload rPC
   1477     str     r3, [rGLUE, #offGlue_pJitProfTable]  @ cache current JitProfTable
   1478 
   1479     @ r0 = dalvikCallsitePC
   1480     bne     .LhandleException           @ no, handle exception
   1481 
   1482     str     r2, [r9, #offThread_inJitCodeCache] @ set the new mode
   1483     cmp     r2, #0                      @ return chaining cell still exists?
   1484     bxne    r2                          @ yes - go ahead
   1485 
   1486     @ continue executing the next instruction through the interpreter
   1487     ldr     r1, .LdvmJitToInterpTraceSelectNoChain @ defined in footer.S
   1488     add     rPC, r0, #6                 @ reconstruct new rPC (advance 6 bytes)
   1489 #if defined(WITH_JIT_TUNING)
   1490     mov     r0, #kCallsiteInterpreted
   1491 #endif
   1492     mov     pc, r1
   1493 
   1494 /*
   1495  * On entry:
   1496  * r0  Faulting Dalvik PC
   1497  */
   1498 .LhandleException:
   1499 #if defined(WITH_SELF_VERIFICATION)
   1500     ldr     pc, .LdeadFood @ should not see this under self-verification mode
   1501 .LdeadFood:
   1502     .word   0xdeadf00d
   1503 #endif
   1504     ldr     r3, [rGLUE, #offGlue_self]  @ r3<- glue->self
   1505     mov     r2, #0
   1506     str     r2, [r3, #offThread_inJitCodeCache] @ in interpreter land
   1507     ldr     r1, .LdvmMterpCommonExceptionThrown @ PIC way of getting &func
   1508     ldr     rIBASE, .LdvmAsmInstructionStart    @ same as above
   1509     mov     rPC, r0                 @ reload the faulting Dalvik address
   1510     mov     pc, r1                  @ branch to dvmMterpCommonExceptionThrown
   1511 
   1512     .align  2
   1513 .LdvmAsmInstructionStart:
   1514     .word   dvmAsmInstructionStart
   1515 .LdvmJitToInterpNoChainNoProfile:
   1516     .word   dvmJitToInterpNoChainNoProfile
   1517 .LdvmJitToInterpTraceSelectNoChain:
   1518     .word   dvmJitToInterpTraceSelectNoChain
   1519 .LdvmJitToInterpNoChain:
   1520     .word   dvmJitToInterpNoChain
   1521 .LdvmMterpStdBail:
   1522     .word   dvmMterpStdBail
   1523 .LdvmMterpCommonExceptionThrown:
   1524     .word   dvmMterpCommonExceptionThrown
   1525 .LdvmLockObject:
   1526     .word   dvmLockObject
   1527 #if defined(WITH_JIT_TUNING)
   1528 .LdvmICHitCount:
   1529     .word   gDvmICHitCount
   1530 #endif
   1531 #if defined(WITH_SELF_VERIFICATION)
   1532 .LdvmSelfVerificationMemOpDecode:
   1533     .word   dvmSelfVerificationMemOpDecode
   1534 #endif
   1535 .L__aeabi_cdcmple:
   1536     .word   __aeabi_cdcmple
   1537 .L__aeabi_cfcmple:
   1538     .word   __aeabi_cfcmple
   1539 
   1540     .global dmvCompilerTemplateEnd
   1541 dmvCompilerTemplateEnd:
   1542 
   1543 #endif /* WITH_JIT */
   1544 
   1545